aboutsummaryrefslogtreecommitdiff
path: root/scripts/analyze-rcynic-history.py
diff options
context:
space:
mode:
authorRob Austein <sra@hactrn.net>2011-10-25 05:31:13 +0000
committerRob Austein <sra@hactrn.net>2011-10-25 05:31:13 +0000
commit43f282c891680cb348a5cf44e036e465290a4a74 (patch)
treed05641d1e36b20188a03f4c0d790774b27a0b530 /scripts/analyze-rcynic-history.py
parentf3d312970304a83c377d5c9b30ae2aa56a529b6d (diff)
New script to generate tabless and graphs from rcynic.xml traffic
data. Still very rough, output somewhat suspect. svn path=/trunk/; revision=4061
Diffstat (limited to 'scripts/analyze-rcynic-history.py')
-rw-r--r--scripts/analyze-rcynic-history.py264
1 files changed, 264 insertions, 0 deletions
diff --git a/scripts/analyze-rcynic-history.py b/scripts/analyze-rcynic-history.py
new file mode 100644
index 00000000..3f421c4d
--- /dev/null
+++ b/scripts/analyze-rcynic-history.py
@@ -0,0 +1,264 @@
+"""
+Parse traffic data out of rynic XML output, whack it a bit, print some
+summaries and run gnuplot to draw some pictures.
+
+$Id$
+
+Copyright (C) 2011 Internet Systems Consortium ("ISC")
+
+Permission to use, copy, modify, and distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
+"""
+
+show_summary = True
+show_sessions = False
+show_plot = True
+plot_to_pdf = True
+plot_to_file = not plot_to_pdf
+plot_all_hosts = False
+
+import mailbox, sys, urlparse, os, getopt, datetime, subprocess
+
+from xml.etree.cElementTree import (ElementTree as ElementTree,
+ fromstring as ElementTreeFromString)
+
+class Rsync_History(object):
+
+ timestamp_format = "%Y-%m-%dT%H:%M:%SZ"
+
+ def __init__(self, elt):
+ self.started = datetime.datetime.strptime(elt.get("started"), self.timestamp_format)
+ self.finished = datetime.datetime.strptime(elt.get("finished"), self.timestamp_format)
+ self.error = elt.get("error")
+ self.uri = elt.text.strip()
+ self.hostname = urlparse.urlparse(self.uri).hostname or None
+ self.elapsed = self.finished - self.started
+
+ def __cmp__(self, other):
+ return (cmp(self.started, other.started) or
+ cmp(self.finished, other.finished) or
+ cmp(self.hostname, other.hostname))
+
+class Host(object):
+
+ def __init__(self, hostname, session_id = None):
+ self.hostname = hostname
+ self.session_ids = []
+ if session_id is not None:
+ self.session_ids.append(session_id)
+ self.elapsed = datetime.timedelta(0)
+ self.connection_count = 0
+ self.live_connections = 0
+ self.uris = set()
+
+ def __add__(self, other):
+ assert self.hostname == other.hostname
+ result = self.__class__(self.hostname)
+ for a in ("elapsed", "connection_count", "live_connections", "session_ids"):
+ setattr(result, a, getattr(self, a) + getattr(other, a))
+ result.uris = self.uris | other.uris
+ return result
+
+ def add_rsync_history(self, h):
+ self.connection_count += 1
+ self.elapsed += h.elapsed
+ self.live_connections += int(h.error is None)
+
+ def add_uri(self, u):
+ self.uris.add(u)
+
+ @property
+ def session_id(self):
+ assert len(self.session_ids) == 1
+ return self.session_ids[0]
+
+ @property
+ def session_count(self):
+ return len(self.session_ids)
+
+ @property
+ def object_count(self):
+ return len(self.uris)
+
+ @property
+ def success_rate_percentage(self):
+ return int((float(self.live_connections) / float(self.connection_count)) * 100)
+
+ @property
+ def seconds_per_object(self):
+ return self.elapsed.total_seconds() / float(self.object_count * self.session_count)
+
+ @property
+ def objects_per_connection(self):
+ return float(self.object_count * self.session_count) / float(self.connection_count)
+
+ @property
+ def scaled_connections(self):
+ return float(self.connection_count) / float(self.session_count)
+
+ @property
+ def scaled_elapsed(self):
+ return self.elapsed / self.session_count
+
+ class Format(object):
+
+ def __init__(self, attr, title, fmt):
+ self.attr = attr
+ self.title = title
+ self.width = len(title) - int("%" in fmt)
+ self.fmt = "%%%d%s" % (self.width, fmt)
+ self.oops = "*" * self.width
+
+ def __call__(self, obj):
+ try:
+ return self.fmt % getattr(obj, self.attr)
+ except ZeroDivisionError:
+ return self.oops
+
+ format = (Format("scaled_elapsed", "Rsync Time", ".10s"),
+ Format("scaled_connections", "Connections", "d"),
+ Format("object_count", "Objects", "d"),
+ Format("objects_per_connection", "Objects/Connection", ".3f"),
+ Format("seconds_per_object", "Seconds/Object", ".3f"),
+ Format("success_rate_percentage", "Success Rate", "d%%"),
+ Format("hostname", "Hostname", "s"))
+
+ separator = " " * 2
+
+ header = separator.join(fmt.title for fmt in format)
+
+ def __str__(self):
+ return self.separator.join(fmt(self) for fmt in self.format)
+
+ format_dict = dict((fmt.attr, fmt) for fmt in format)
+
+ def format_field(self, name):
+ return self.format_dict[name](self).strip()
+
+class Session(dict):
+
+ def __init__(self, session_id = None):
+ self.session_id = session_id
+
+ @property
+ def hostnames(self):
+ return set(self.iterkeys())
+
+ def get_plot_row(self, name, hostnames):
+ return (self.session_id,) + tuple(self[h].format_field(name) if h in self else "" for h in hostnames)
+
+ def __add__(self, other):
+ result = self.__class__()
+ for h in self.hostnames | other.hostnames:
+ if h in self and h in other:
+ result[h] = self[h] + other[h]
+ elif h in self:
+ result[h] = self[h]
+ else:
+ result[h] = other[h]
+ return result
+
+ def add_rsync_history(self, h):
+ if h.hostname not in self:
+ self[h.hostname] = Host(h.hostname, self.session_id)
+ self[h.hostname].add_rsync_history(h)
+
+ def add_uri(self, u):
+ h = urlparse.urlparse(u).hostname
+ if h and h in self:
+ self[h].add_uri(u)
+
+ def dump(self, title, f = sys.stdout):
+ f.write("\n" + title + "\n" + Host.header + "\n")
+ for h in sorted(self):
+ f.write(str(self[h]) + "\n")
+
+mb = mailbox.Maildir("/u/sra/rpki/rcynic-xml", factory = None, create = False)
+
+sessions = []
+
+for msg in mb.itervalues():
+
+ sys.stderr.write(".")
+
+ assert not msg.is_multipart()
+
+ input = ElementTreeFromString(msg.get_payload())
+
+ session = Session(input.get("date"))
+ sessions.append(session)
+
+ for elt in input.findall("rsync_history"):
+ session.add_rsync_history(Rsync_History(elt))
+
+ for elt in input.findall("validation_status"):
+ if elt.get("generation") == "current":
+ session.add_uri(elt.text.strip())
+
+sys.stderr.write("\n")
+
+summary = sum(sessions, Session())
+
+if show_summary:
+ summary.dump("Summary (%d sessions)" % len(sessions))
+
+if show_sessions:
+ for i, session in enumerate(sessions, 1):
+ session.dump("Session #%d (%s)" % (i, session.session_id))
+
+def plotter(plot, hostnames, field):
+ plotlines = sorted(session.get_plot_row(field, hostnames) for session in sessions)
+ title = Host.format_dict[field].title
+ n = len(hostnames) + 1
+ assert all(n == len(plotline) for plotline in plotlines)
+ if plot_to_pdf:
+ plot.write("""
+ set terminal pdf
+ set output '%s.pdf'
+ """ % field)
+ if "%%" in Host.format_dict[field].fmt:
+ plot.write("""
+ set format y "%.0f%%"
+ """)
+ plot.write("""
+ set xdata time
+ set timefmt '%Y-%m-%dT%H:%M:%SZ'
+ #set format x '%H:%M:%S'
+ #set format x '%m-%d'
+ set format x '%a%H'
+ set title '""" + title + """'
+ plot""" + ",".join(" '-' using 1:2 with lines title '%s'" % h for h in hostnames) + "\n")
+ for i in xrange(1, n):
+ for plotline in plotlines:
+ plot.write("%s %s\n" % (plotline[0], plotline[i].rstrip("%")))
+ plot.write("e\n")
+ if not plot_to_pdf:
+ plot.write("pause -1\n")
+
+if show_plot:
+ if plot_all_hosts:
+ hostnames = tuple(sorted(summary.hostnames))
+ else:
+ hostnames = ("rpki.apnic.net", "rpki.ripe.net", "repository.lacnic.net",
+ "arin.rpki.net", "rgnet.rpki.net",
+ "rpki.surfnet.nl", "rpki.antd.nist.gov")
+ if plot_to_file:
+ plot = open("foo.plot", "w")
+ else:
+ gnuplot = subprocess.Popen(("gnuplot",), stdin = subprocess.PIPE)
+ plot = gnuplot.stdin
+ for f in Host.format:
+ if f.attr not in ("scaled_elapsed", "hostname"):
+ plotter(plot, hostnames, f.attr)
+ plot.close()
+ if not plot_to_file:
+ gnuplot.wait()