aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorRob Austein <sra@hactrn.net>2012-05-11 20:17:11 +0000
committerRob Austein <sra@hactrn.net>2012-05-11 20:17:11 +0000
commit0d03c783bbeafca23558a79c09735f92580c4384 (patch)
tree8486c479e442fa16c5fa1a4cc3e3665f51c6adad /scripts
parentf9ff6d1713750cae4062d2a7a38cab4119642936 (diff)
Checkpoint of a tool to convert rcynic.xml to rrdtool input. Not
usable yet. svn path=/trunk/; revision=4486
Diffstat (limited to 'scripts')
-rw-r--r--scripts/rrd-rcynic-history.py188
1 files changed, 188 insertions, 0 deletions
diff --git a/scripts/rrd-rcynic-history.py b/scripts/rrd-rcynic-history.py
new file mode 100644
index 00000000..25598ac1
--- /dev/null
+++ b/scripts/rrd-rcynic-history.py
@@ -0,0 +1,188 @@
+"""
+Parse traffic data out of rynic XML output, whack it a bit, and stuff
+it into one or more RRDs (Round Robin Databases -- see rrdtool).
+
+Haven't decided yet whether to draw the resulting pictures here or
+elsewhere.
+
+This is an initial adaptation of analyze-rcynic-history.py, which uses
+gnuplot and a shelve database. It's also my first attempt to do
+anything with rrdtool, so no doubt I'll get half of it wrong.
+
+$Id$
+
+Copyright (C) 2011-2012 Internet Systems Consortium ("ISC")
+
+Permission to use, copy, modify, and distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
+"""
+
+import mailbox
+import sys
+import urlparse
+import os
+import time
+
+from xml.etree.cElementTree import (ElementTree as ElementTree,
+ fromstring as ElementTreeFromString)
+
+os.putenv("TZ", "UTC")
+time.tzset()
+
+def parse_utc(s):
+ return int(time.mktime(time.strptime(s, "%Y-%m-%dT%H:%M:%SZ")))
+
+class Host(object):
+ """
+ A host object represents all the data collected for one host for a given session.
+ """
+
+ def __init__(self, hostname, session_timestamp):
+ self.hostname = hostname
+ self.session_timestamp = session_timestamp
+ self.elapsed = 0
+ self.connection_count = 0
+ self.dead_connections = 0
+ self.uris = set()
+ self.total_connection_time = 0
+
+ def add_connection(self, elt):
+ elapsed = parse_utc(elt.get("finished")) - parse_utc(elt.get("started"))
+ self.connection_count += 1
+ self.elapsed += elapsed
+ self.total_connection_time += elapsed
+ if elt.get("error") is not None:
+ self.dead_connections += 1
+
+ def add_object_uri(self, u):
+ self.uris.add(u)
+
+ def finalize(self):
+ self.object_count = len(self.uris)
+ del self.uris
+
+ def safe_division(self, numerator, denominator):
+ if self.failed:
+ return "U"
+ try:
+ return float(numerator) / float(denominator)
+ except ZeroDivisionError:
+ return "U"
+
+ @property
+ def failed(self):
+ return 1 if self.dead_connections else 0
+
+ @property
+ def seconds_per_object(self):
+ return self.safe_division(self.elapsed, self.object_count)
+
+ @property
+ def objects_per_connection(self):
+ return self.safe_division(self.object_count, self.connection_count)
+
+ @property
+ def average_connection_time(self):
+ return self.safe_division(self.total_connection_time, self.connection_count)
+
+ def save(self, rrdtable):
+ self.finalize()
+ rrdtable.add(self.hostname,
+ (self.session_timestamp,
+ self.connection_count,
+ self.object_count,
+ self.objects_per_connection,
+ self.seconds_per_object,
+ self.average_connection_time,
+ self.failed))
+
+class Session(dict):
+ """
+ A session corresponds to one XML file. This is a dictionary of Host
+ objects, keyed by hostname.
+ """
+
+ def __init__(self, session_timestamp):
+ self.session_timestamp = session_timestamp
+
+ @property
+ def hostnames(self):
+ return set(self.iterkeys())
+
+ def add_connection(self, elt):
+ hostname = urlparse.urlparse(elt.text.strip()).hostname
+ if hostname not in self:
+ self[hostname] = Host(hostname, self.session_timestamp)
+ self[hostname].add_connection(elt)
+
+ def add_object_uri(self, u):
+ h = urlparse.urlparse(u).hostname
+ if h and h in self:
+ self[h].add_object_uri(u)
+
+ def save(self, rrdtable):
+ for h in self.itervalues():
+ h.save(rrdtable)
+
+class RRDTable(dict):
+ """
+ Final data we're going to be sending to rrdtool. We need to buffer
+ it until we're done because we have to sort it. Might be easier
+ just to sort the maildir, then again it might be easier to get rid
+ of the maildir too once we're dealing with current data. We'll see.
+ """
+
+ def add(self, hostname, data):
+ if hostname not in self:
+ self[hostname] = []
+ self[hostname].append(data)
+
+ def sort(self):
+ for data in self.itervalues():
+ data.sort()
+
+ def save(self):
+ for hostname, data in self.iteritems():
+ for datum in data:
+ print "rrdtool update %s.rrd %s" % (hostname, ":".join(str(d) for d in datum))
+
+
+mb = mailbox.Maildir("/u/sra/rpki/rcynic-xml", factory = None, create = False)
+
+rrdtable = RRDTable()
+
+for i, key in enumerate(mb.iterkeys(), 1):
+ sys.stderr.write("\r%s %d/%d..." % ("|\\-/"[i & 3], i, len(mb)))
+
+ assert not mb[key].is_multipart()
+ input = ElementTreeFromString(mb[key].get_payload())
+ date = input.get("date")
+ sys.stderr.write("%s..." % date)
+ session = Session(parse_utc(date))
+ for elt in input.findall("rsync_history"):
+ session.add_connection(elt)
+ for elt in input.findall("validation_status"):
+ if elt.get("generation") == "current":
+ session.add_object_uri(elt.text.strip())
+ session.save(rrdtable)
+
+ # XXX
+ if i > 4:
+ break
+
+sys.stderr.write("\n")
+
+print
+print
+
+rrdtable.sort()
+rrdtable.save()