#!/usr/bin/env python

# $Id$
#
# Copyright (C) 2013-2014 Dragon Research Labs ("DRL")
# Portions copyright (C) 2011-2012  Internet Systems Consortium ("ISC")
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notices and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND DRL AND ISC DISCLAIM ALL
# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL DRL OR
# ISC BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
# DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
# OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.

"""
Parse rcynic XML output, stuff the data that validation_status script
would print into an SQL database for subsequent analysis.
"""

import os
import sys
import time
import mailbox
import sqlite3
import argparse
import lxml.etree
import subprocess

class Parser(object):

  @staticmethod
  def main():
    parser = argparse.ArgumentParser(
      description     = __doc__,
      formatter_class = argparse.ArgumentDefaultsHelpFormatter)
    group = parser.add_mutually_exclusive_group(required = True)
    group.add_argument("--mailbox", "--mb",
                       help = "Maildir mailbox containing rcynic XML output")
    group.add_argument("--tarballs",
                       help = "directory tree of tar files containing containing rcynic XML output")
    parser.add_argument("--database", "--db",
                        default = "validation-status-sql.db",
                        help = "SQLite3 database")
    parser.add_argument("--path-within-tarball",
                        default = "var/rcynic/data/rcynic.xml",
                        help = "rcynic.xml path name within tarball(s)")
    parser.add_argument("--tar-extensions", nargs = "+",
                        default = ".tar .tar.gz .tgz .tar.bz2 .tbz .tar.xz .txz".split(),
                        help = "extensions to recognize as indicating tar files")
    args = parser.parse_args()
    if args.mailbox:
      ParserMailbox(args)
    else:
      ParserTarball(args)

  def __init__(self, args):
    self.args = args
    self.init_sql()
    self.init_hook()
    self.index1()
    self.parsed = 1
    for self.current, self.iterval in enumerate(self.iterator, 1):
      self.parse_xml()
    if self.parsed > 1:
      sys.stderr.write("\n")
    self.index2()
    self.db.close()


  def init_sql(self):
    creating = not os.path.exists(self.args.database)
    self.db = sqlite3.connect(self.args.database)
    self.db.text_factory = str
    self.db.executescript('''
      PRAGMA foreign_keys  = off;
      PRAGMA synchronous   = off;
      PRAGMA count_changes = off;
      ''')

    if creating:
      self.db.executescript('''
        CREATE TABLE sessions (
              session_id      INTEGER PRIMARY KEY NOT NULL,
              session         DATETIME NOT NULL,
              handle          TEXT NOT NULL
        );

        CREATE TABLE uris (
              uri_id          INTEGER PRIMARY KEY NOT NULL,
              uri             TEXT NOT NULL
        );

        CREATE TABLE codes (
              code_id         INTEGER PRIMARY KEY NOT NULL,
              code            TEXT NOT NULL
        );

        CREATE TABLE generations (
              generation_id   INTEGER PRIMARY KEY NOT NULL,
              generation      TEXT NOT NULL
        );

        CREATE TABLE events (
              id              INTEGER PRIMARY KEY NOT NULL,
              timestamp       DATETIME NOT NULL,
              session_id      INTEGER NOT NULL REFERENCES sessions    (session_id)      ON DELETE RESTRICT ON UPDATE RESTRICT,
              generation_id   INTEGER NOT NULL REFERENCES generations (generation_id)   ON DELETE RESTRICT ON UPDATE RESTRICT,
              code_id         INTEGER NOT NULL REFERENCES codes       (code_id)         ON DELETE RESTRICT ON UPDATE RESTRICT,
              uri_id          INTEGER NOT NULL REFERENCES uris        (uri_id)          ON DELETE RESTRICT ON UPDATE RESTRICT
        );

        CREATE VIEW status AS
             SELECT id, handle, session, timestamp, generation, code, uri
             FROM               events
             NATURAL JOIN       sessions
             NATURAL JOIN       uris
             NATURAL JOIN       codes
             NATURAL JOIN       generations;
        ''')


  def index1(self):
    self.db.executescript('''
      CREATE UNIQUE INDEX IF NOT EXISTS sessions_index         ON sessions     (session);
      CREATE UNIQUE INDEX IF NOT EXISTS handles_index          ON sessions     (handle);
      CREATE UNIQUE INDEX IF NOT EXISTS uris_index             ON uris         (uri);
      CREATE UNIQUE INDEX IF NOT EXISTS codes_index            ON codes        (code);
      CREATE UNIQUE INDEX IF NOT EXISTS generations_index      ON generations  (generation);
      ''')


  def index2(self):
    self.db.executescript('''
      CREATE UNIQUE INDEX IF NOT EXISTS events_index           ON events       (uri_id, timestamp, code_id, generation_id);
      ''')


  def string_id(self, table, value):
    field = table.rstrip("s")
    try:
      return self.db.execute("SELECT %s_id FROM %s WHERE %s = ?" % (field, table, field), (value,)).fetchone()[0]
    except:
      return self.db.execute("INSERT INTO %s (%s) VALUES (?)" % (table, field), (value,)).lastrowid


  def parse_xml(self):
    sys.stderr.write("\r%s %d/%d/%d...%s   " % ("|\\-/"[self.current & 3],
                                                self.current, self.parsed, self.total, self.handle))
    if self.db.execute("SELECT handle FROM sessions WHERE handle = ?", (self.handle,)).fetchone():
      return
    xml = self.read_xml()
    with self.db:
      session_id = self.db.execute("INSERT INTO sessions (session, handle) VALUES (strftime('%s', ?), ?)",
                                   (xml.get("date"), self.handle)).lastrowid
      self.db.executemany("INSERT INTO events (session_id, timestamp, generation_id, code_id, uri_id) "
                          "VALUES (?, strftime('%s', ?), ?, ?, ?)",
                          ((session_id,
                            x.get("timestamp"),
                            self.string_id("generations", x.get("generation", "none")),
                            self.string_id("codes",       x.get("status")),
                            self.string_id("uris",        x.text.strip()))
                           for x in xml.findall("validation_status")))
    self.parsed += 1


class ParserTarball(Parser):

  def init_hook(self):
    self.total = 0
    for fn in self.iter_tarball_names():
      self.total += 1
    self.iterator = self.iter_tarball_names()

  @property
  def handle(self):
    return self.iterval

  def read_xml(self):
    return lxml.etree.ElementTree(
      file = subprocess.Popen(("tar", "Oxf", self.iterval, self.args.path_within_tarball),
                              stdout = subprocess.PIPE).stdout).getroot()

  def iter_tarball_names(self):
    if os.path.isdir(self.args.tarballs):
      for root, dirs, files in os.walk(self.args.tarballs):
        for fn in files:
          if any(fn.endswith(ext) for ext in self.args.tar_extensions):
            yield os.path.join(root, fn)
    else:
      yield self.args.tarballs


class ParserMailbox(Parser):

  def init_hook(self):
    self.mb = mailbox.Maildir(self.args.mailbox, factory = None, create = False)
    self.total = len(self.mb)
    self.iterator = self.mb.iterkeys()

  @property
  def handle(self):
    return self.mb[self.iterval].get("Message-ID")

  def read_xml(self):
    return lxml.etree.XML(self.mb[self.iterval].get_payload())


if __name__ == "__main__":
  try:
    Parser.main()
  except KeyboardInterrupt:
    pass