aboutsummaryrefslogtreecommitdiff
path: root/potpourri/validation-status-sql.py
blob: 646d7d9b2e200c5439981fb399bb7c2ed78cec8f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888 } /* Comment */
.highlight .err { color: #A61717; background-color: #E3D2D2 } /* Error */
.highlight .k { color: #080; font-weight: bold } /* Keyword */
.highlight .ch { color: #888 } /* Comment.Hashbang */
.highlight .cm { color: #888 } /* Comment.Multiline */
.highlight .cp { color: #C00; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888 } /* Comment.Single */
.highlight .cs { color: #C00; font-weight: bold; background-color: #FFF0F0 } /* Comment.Special */
.highlight .gd { color: #000; background-color: #FDD } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
.highlight .gr { color: #A00 } /* Generic.Error */
.highlight .gh { color: #333 } /* Generic.Heading */
.highlight .gi { color: #000; background-color: #DFD } /* Generic.Inserted */
.highlight .go { color: #888 } /* Generic.Output */
.highlight .gp { color: #555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666 } /* Generic.Subheading */
.highlight .gt { color: #A00 } /* Generic.Traceback */
.highlight .kc { color: #080; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #080; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #080; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #080 } /* Keyword.Pseudo */
.highlight .kr { color: #080; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #00D; font-weight: bold } /* Literal.Number */
.highlight .s { color: #D20; background-color: #FFF0F0 } /* Literal.String */
.highlight .na { color: #369 } /* Name.Attribute */
.highlight .nb { color: #038 } /* Name.Builtin */
.highlight .nc { color: #B06; font-weight: bold } /* Name.Class */
.highlight .no { color: #036; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555 } /* Name.Decorator */
.highlight .ne { color: #B06; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #06B; font-weight: bold } /* Name.Function */
.highlight .nl { color: #369; font-style: italic } /* Name.Label */
.highlight .nn { color: #B06; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #369; font-weight: bold } /* Name.Property */
.highlight .nt { color: #B06; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #369 } /* Name.Variable */
.highlight .ow { color: #080 } /* Operator.Word */
.highlight .w { color: #BBB } /* Text.Whitespace */
.highlight .mb { color: #00D; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #00D; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #00D; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #00D; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #00D; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #D20; background-color: #FFF0F0 } /* Literal.String.Affix */
.highlight .sb { color: #D20; background-color: #FFF0F0 } /* Literal.String.Backtick */
.highlight .sc { color: #D20; background-color: #FFF0F0 } /* Literal.String.Char */
.highlight .dl { color: #D20; background-color: #FFF0F0 } /* Literal.String.Delimiter */
.highlight .sd { color: #D20; background-color: #FFF0F0 } /* Literal.String.Doc */
.highlight .s2 { color: #D20; background-color: #FFF0F0 } /* Literal.String.Double */
.highlight .se { color: #04D; background-color: #FFF0F0 } /* Literal.String.Escape */
.highlight .sh { color: #D20; background-color: #FFF0F0 } /* Literal.String.Heredoc */
.highlight .si { color: #33B; background-color: #FFF0F0 } /* Literal.String.Interpol */
.highlight .sx { color: #2B2; background-color: #F0FFF0 } /* Literal.String.Other */
.highlight .sr { color: #080; background-color: #FFF0FF } /* Literal.String.Regex */
.highlight .s1 { color: #D20; background-color: #FFF0F0 } /* Literal.String.Single */
.highlight .ss { color: #A60; background-color: #FFF0F0 } /* Literal.String.Symbol */
.highlight .bp { color: #038 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #06B; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #369 } /* Name.Variable.Class */
.highlight .vg { color: #D70 } /* Name.Variable.Global */
.highlight .vi { color: #33B } /* Name.Variable.Instance */
.highlight .vm { color: #369 } /* Name.Variable.Magic */
.highlight .il { color: #00D; font-weight: bold } /* Literal.Number.Integer.Long */
****** RPKI Tools Manual ******

This collection of tools implements both the production (CA) and relying party
(RP) sides of an RPKI environment.

The Subversion repository for the entire project is available for (read-only)
anonymous access at http://subvert-rpki.hactrn.net/.

If you just want to browse the code you might find the Trac source code browser
interface more convenient.

***** Download *****

The recommended way to obtain the code is via subversion. To download, do:

  $ svn checkout http://subvert-rpki.hactrn.net/trunk/

Nightly snapshots of trunk are also available from http://download.rpki.net/ as
xz-compressed tarballs.

***** Installation *****

See the instal
#!/usr/bin/env python

# $Id$
#
# Copyright (C) 2013-2014 Dragon Research Labs ("DRL")
# Portions copyright (C) 2011-2012  Internet Systems Consortium ("ISC")
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notices and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND DRL AND ISC DISCLAIM ALL
# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL DRL OR
# ISC BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
# DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
# OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.

"""
Parse rcynic XML output, stuff the data that validation_status script
would print into an SQL database for subsequent analysis.
"""

import os
import sys
import time
import mailbox
import sqlite3
import argparse
import lxml.etree
import subprocess

class Parser(object):

    @staticmethod
    def main():
        parser = argparse.ArgumentParser(
            description     = __doc__,
            formatter_class = argparse.ArgumentDefaultsHelpFormatter)
        group = parser.add_mutually_exclusive_group(required = True)
        group.add_argument("--mailbox", "--mb",
                           help = "Maildir mailbox containing rcynic XML output")
        group.add_argument("--tarballs",
                           help = "directory tree of tar files containing containing rcynic XML output")
        parser.add_argument("--database", "--db",
                            default = "validation-status-sql.db",
                            help = "SQLite3 database")
        parser.add_argument("--path-within-tarball",
                            default = "var/rcynic/data/rcynic.xml",
                            help = "rcynic.xml path name within tarball(s)")
        parser.add_argument("--tar-extensions", nargs = "+",
                            default = ".tar .tar.gz .tgz .tar.bz2 .tbz .tar.xz .txz".split(),
                            help = "extensions to recognize as indicating tar files")
        args = parser.parse_args()
        if args.mailbox:
            ParserMailbox(args)
        else:
            ParserTarball(args)

    def __init__(self, args):
        self.args = args
        self.init_sql()
        self.init_hook()
        self.index1()
        self.parsed = 1
        for self.current, self.iterval in enumerate(self.iterator, 1):
            self.parse_xml()
        if self.parsed > 1:
            sys.stderr.write("\n")
        self.index2()
        self.db.close()


    def init_sql(self):
        creating = not os.path.exists(self.args.database)
        self.db = sqlite3.connect(self.args.database)
        self.db.text_factory = str
        self.db.executescript('''
          PRAGMA foreign_keys  = off;
          PRAGMA synchronous   = off;
          PRAGMA count_changes = off;
          ''')

        if creating:
            self.db.executescript('''
                CREATE TABLE sessions (
                      session_id      INTEGER PRIMARY KEY NOT NULL,
                      session         DATETIME NOT NULL,
                      handle          TEXT NOT NULL
                );

                CREATE TABLE uris (
                      uri_id          INTEGER PRIMARY KEY NOT NULL,
                      uri             TEXT NOT NULL
                );

                CREATE TABLE codes (
                      code_id         INTEGER PRIMARY KEY NOT NULL,
                      code            TEXT NOT NULL
                );

                CREATE TABLE generations (
                      generation_id   INTEGER PRIMARY KEY NOT NULL,
                      generation      TEXT NOT NULL
                );

                CREATE TABLE events (
                      id              INTEGER PRIMARY KEY NOT NULL,
                      timestamp       DATETIME NOT NULL,
                      session_id      INTEGER NOT NULL REFERENCES sessions    (session_id)      ON DELETE RESTRICT ON UPDATE RESTRICT,
                      generation_id   INTEGER NOT NULL REFERENCES generations (generation_id)   ON DELETE RESTRICT ON UPDATE RESTRICT,
                      code_id         INTEGER NOT NULL REFERENCES codes       (code_id)         ON DELETE RESTRICT ON UPDATE RESTRICT,
                      uri_id          INTEGER NOT NULL REFERENCES uris        (uri_id)          ON DELETE RESTRICT ON UPDATE RESTRICT
                );

                CREATE VIEW status AS
                     SELECT id, handle, session, timestamp, generation, code, uri
                     FROM               events
                     NATURAL JOIN       sessions
                     NATURAL JOIN       uris
                     NATURAL JOIN       codes
                     NATURAL JOIN       generations;
                ''')


    def index1(self):
        self.db.executescript('''
          CREATE UNIQUE INDEX IF NOT EXISTS sessions_index         ON sessions     (session);
          CREATE UNIQUE INDEX IF NOT EXISTS handles_index          ON sessions     (handle);
          CREATE UNIQUE INDEX IF NOT EXISTS uris_index             ON uris         (uri);
          CREATE UNIQUE INDEX IF NOT EXISTS codes_index            ON codes        (code);
          CREATE UNIQUE INDEX IF NOT EXISTS generations_index      ON generations  (generation);
          ''')


    def index2(self):
        self.db.executescript('''
          CREATE UNIQUE INDEX IF NOT EXISTS events_index           ON events       (uri_id, timestamp, code_id, generation_id);
          ''')


    def string_id(self, table, value):
        field = table.rstrip("s")
        try:
            return self.db.execute("SELECT %s_id FROM %s WHERE %s = ?" % (field, table, field), (value,)).fetchone()[0]
        except:
            return self.db.execute("INSERT INTO %s (%s) VALUES (?)" % (table, field), (value,)).lastrowid


    def parse_xml(self):
        sys.stderr.write("\r%s %d/%d/%d...%s   " % ("|\\-/"[self.current & 3],
                                                    self.current, self.parsed, self.total, self.handle))
        if self.db.execute("SELECT handle FROM sessions WHERE handle = ?", (self.handle,)).fetchone():
            return
        xml = self.read_xml()
        with self.db:
            session_id = self.db.execute("INSERT INTO sessions (session, handle) VALUES (strftime('%s', ?), ?)",
                                         (xml.get("date"), self.handle)).lastrowid
            self.db.executemany("INSERT INTO events (session_id, timestamp, generation_id, code_id, uri_id) "
                                "VALUES (?, strftime('%s', ?), ?, ?, ?)",
                                ((session_id,
                                  x.get("timestamp"),
                                  self.string_id("generations", x.get("generation", "none")),
                                  self.string_id("codes",       x.get("status")),
                                  self.string_id("uris",        x.text.strip()))
                                 for x in xml.findall("validation_status")))
        self.parsed += 1


class ParserTarball(Parser):

    def init_hook(self):
        self.total = 0
        for fn in self.iter_tarball_names():
            self.total += 1
        self.iterator = self.iter_tarball_names()

    @property
    def handle(self):
        return self.iterval

    def read_xml(self):
        return lxml.etree.ElementTree(
            file = subprocess.Popen(("tar", "Oxf", self.iterval, self.args.path_within_tarball),
                                    stdout = subprocess.PIPE).stdout).getroot()

    def iter_tarball_names(self):
        if os.path.isdir(self.args.tarballs):
            for root, dirs, files in os.walk(self.args.tarballs):
                for fn in files:
                    if any(fn.endswith(ext) for ext in self.args.tar_extensions):
                        yield os.path.join(root, fn)
        else:
            yield self.args.tarballs


class ParserMailbox(Parser):

    def init_hook(self):
        self.mb = mailbox.Maildir(self.args.mailbox, factory = None, create = False)
        self.total = len(self.mb)
        self.iterator = self.mb.iterkeys()

    @property
    def handle(self):
        return self.mb[self.iterval].get("Message-ID")

    def read_xml(self):
        return lxml.etree.XML(self.mb[self.iterval].get_payload())


if __name__ == "__main__":
    try:
        Parser.main()
    except KeyboardInterrupt:
        pass