aboutsummaryrefslogtreecommitdiff
path: root/potpourri/analyze-rcynic-history.py
diff options
context:
space:
mode:
Diffstat (limited to 'potpourri/analyze-rcynic-history.py')
-rw-r--r--potpourri/analyze-rcynic-history.py428
1 files changed, 214 insertions, 214 deletions
diff --git a/potpourri/analyze-rcynic-history.py b/potpourri/analyze-rcynic-history.py
index 648538cc..c0836ab2 100644
--- a/potpourri/analyze-rcynic-history.py
+++ b/potpourri/analyze-rcynic-history.py
@@ -1,11 +1,11 @@
# $Id$
-#
+#
# Copyright (C) 2011-2012 Internet Systems Consortium ("ISC")
-#
+#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
-#
+#
# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
# AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
@@ -35,201 +35,201 @@ from xml.etree.cElementTree import (ElementTree as ElementTree,
fromstring as ElementTreeFromString)
def parse_utc(s):
- return datetime.datetime.strptime(s, "%Y-%m-%dT%H:%M:%SZ")
+ return datetime.datetime.strptime(s, "%Y-%m-%dT%H:%M:%SZ")
class Rsync_History(object):
- """
- An Rsync_History object represents one rsync connection.
- """
+ """
+ An Rsync_History object represents one rsync connection.
+ """
- def __init__(self, elt):
- self.error = elt.get("error")
- self.uri = elt.text.strip()
- self.hostname = urlparse.urlparse(self.uri).hostname or None
- self.elapsed = parse_utc(elt.get("finished")) - parse_utc(elt.get("started"))
+ def __init__(self, elt):
+ self.error = elt.get("error")
+ self.uri = elt.text.strip()
+ self.hostname = urlparse.urlparse(self.uri).hostname or None
+ self.elapsed = parse_utc(elt.get("finished")) - parse_utc(elt.get("started"))
class Host(object):
- """
- A host object represents all the data collected for one host. Note
- that it (usually) contains a list of all the sessions in which this
- host appears.
- """
-
- def __init__(self, hostname, session_id):
- self.hostname = hostname
- self.session_id = session_id
- self.elapsed = datetime.timedelta(0)
- self.connection_count = 0
- self.dead_connections = 0
- self.uris = set()
- self.total_connection_time = datetime.timedelta(0)
-
- def add_rsync_history(self, h):
- self.connection_count += 1
- self.elapsed += h.elapsed
- self.dead_connections += int(h.error is not None)
- self.total_connection_time += h.elapsed
-
- def add_uri(self, u):
- self.uris.add(u)
-
- def finalize(self):
- self.object_count = len(self.uris)
- del self.uris
-
- @property
- def failed(self):
- return 1 if self.dead_connections else 0
-
- @property
- def seconds_per_object(self):
- if self.failed:
- return None
- else:
- return float(self.elapsed.days * 24 * 60 * 60 +
- self.elapsed.seconds +
- self.elapsed.microseconds / 10**6) / float(self.object_count)
-
- @property
- def objects_per_connection(self):
- if self.failed:
- return None
- else:
- return float(self.object_count) / float(self.connection_count)
-
- @property
- def average_connection_time(self):
- return float(self.total_connection_time.days * 24 * 60 * 60 +
- self.total_connection_time.seconds +
- self.total_connection_time.microseconds / 10**6) / float(self.connection_count)
-
- class Format(object):
-
- def __init__(self, attr, title, fmt, ylabel = ""):
- self.attr = attr
- self.title = title
- self.width = len(title) - int("%" in fmt)
- self.fmt = "%%%d%s" % (self.width, fmt)
- self.oops = "*" * self.width
- self.ylabel = ylabel
-
- def __call__(self, obj):
- try:
- value = getattr(obj, self.attr)
- return None if value is None else self.fmt % value
- except ZeroDivisionError:
- return self.oops
-
- format = (Format("connection_count", "Connections", "d", "Connections To Repository (Per Session)"),
- Format("object_count", "Objects", "d", "Objects In Repository (Distinct URIs Per Session)"),
- Format("objects_per_connection", "Objects/Connection", ".3f", "Objects In Repository / Connections To Repository"),
- Format("seconds_per_object", "Seconds/Object", ".3f", "Seconds To Transfer / Object (Average Per Session)"),
- Format("failure_rate_running", "Failure Rate", ".3f%%", "Sessions With Failed Connections Within Last %d Hours" % window_hours),
- Format("average_connection_time", "Average Connection", ".3f", "Seconds / Connection (Average Per Session)"),
- Format("hostname", "Hostname", "s"))
-
- format_dict = dict((fmt.attr, fmt) for fmt in format)
-
- def format_field(self, name):
- result = self.format_dict[name](self)
- return None if result is None else result.strip()
+ """
+ A host object represents all the data collected for one host. Note
+ that it (usually) contains a list of all the sessions in which this
+ host appears.
+ """
+
+ def __init__(self, hostname, session_id):
+ self.hostname = hostname
+ self.session_id = session_id
+ self.elapsed = datetime.timedelta(0)
+ self.connection_count = 0
+ self.dead_connections = 0
+ self.uris = set()
+ self.total_connection_time = datetime.timedelta(0)
+
+ def add_rsync_history(self, h):
+ self.connection_count += 1
+ self.elapsed += h.elapsed
+ self.dead_connections += int(h.error is not None)
+ self.total_connection_time += h.elapsed
+
+ def add_uri(self, u):
+ self.uris.add(u)
+
+ def finalize(self):
+ self.object_count = len(self.uris)
+ del self.uris
+
+ @property
+ def failed(self):
+ return 1 if self.dead_connections else 0
+
+ @property
+ def seconds_per_object(self):
+ if self.failed:
+ return None
+ else:
+ return float(self.elapsed.days * 24 * 60 * 60 +
+ self.elapsed.seconds +
+ self.elapsed.microseconds / 10**6) / float(self.object_count)
+
+ @property
+ def objects_per_connection(self):
+ if self.failed:
+ return None
+ else:
+ return float(self.object_count) / float(self.connection_count)
+
+ @property
+ def average_connection_time(self):
+ return float(self.total_connection_time.days * 24 * 60 * 60 +
+ self.total_connection_time.seconds +
+ self.total_connection_time.microseconds / 10**6) / float(self.connection_count)
+
+ class Format(object):
+
+ def __init__(self, attr, title, fmt, ylabel = ""):
+ self.attr = attr
+ self.title = title
+ self.width = len(title) - int("%" in fmt)
+ self.fmt = "%%%d%s" % (self.width, fmt)
+ self.oops = "*" * self.width
+ self.ylabel = ylabel
+
+ def __call__(self, obj):
+ try:
+ value = getattr(obj, self.attr)
+ return None if value is None else self.fmt % value
+ except ZeroDivisionError:
+ return self.oops
+
+ format = (Format("connection_count", "Connections", "d", "Connections To Repository (Per Session)"),
+ Format("object_count", "Objects", "d", "Objects In Repository (Distinct URIs Per Session)"),
+ Format("objects_per_connection", "Objects/Connection", ".3f", "Objects In Repository / Connections To Repository"),
+ Format("seconds_per_object", "Seconds/Object", ".3f", "Seconds To Transfer / Object (Average Per Session)"),
+ Format("failure_rate_running", "Failure Rate", ".3f%%", "Sessions With Failed Connections Within Last %d Hours" % window_hours),
+ Format("average_connection_time", "Average Connection", ".3f", "Seconds / Connection (Average Per Session)"),
+ Format("hostname", "Hostname", "s"))
+
+ format_dict = dict((fmt.attr, fmt) for fmt in format)
+
+ def format_field(self, name):
+ result = self.format_dict[name](self)
+ return None if result is None else result.strip()
class Session(dict):
- """
- A session corresponds to one XML file. This is a dictionary of Host
- objects, keyed by hostname.
- """
-
- def __init__(self, session_id, msg_key):
- self.session_id = session_id
- self.msg_key = msg_key
- self.date = parse_utc(session_id)
- self.calculated_failure_history = False
-
- @property
- def hostnames(self):
- return set(self.iterkeys())
-
- def get_plot_row(self, name, hostnames):
- return (self.session_id,) + tuple(self[h].format_field(name) if h in self else "" for h in hostnames)
-
- def add_rsync_history(self, h):
- if h.hostname not in self:
- self[h.hostname] = Host(h.hostname, self.session_id)
- self[h.hostname].add_rsync_history(h)
-
- def add_uri(self, u):
- h = urlparse.urlparse(u).hostname
- if h and h in self:
- self[h].add_uri(u)
-
- def finalize(self):
- for h in self.itervalues():
- h.finalize()
-
- def calculate_failure_history(self, sessions):
- start = self.date - datetime.timedelta(hours = window_hours)
- sessions = tuple(s for s in sessions if s.date <= self.date and s.date > start)
- for hostname, h in self.iteritems():
- i = n = 0
- for s in sessions:
- if hostname in s:
- i += s[hostname].failed
- n += 1
- h.failure_rate_running = float(100 * i) / n
- self.calculated_failure_history = True
+ """
+ A session corresponds to one XML file. This is a dictionary of Host
+ objects, keyed by hostname.
+ """
+
+ def __init__(self, session_id, msg_key):
+ self.session_id = session_id
+ self.msg_key = msg_key
+ self.date = parse_utc(session_id)
+ self.calculated_failure_history = False
+
+ @property
+ def hostnames(self):
+ return set(self.iterkeys())
+
+ def get_plot_row(self, name, hostnames):
+ return (self.session_id,) + tuple(self[h].format_field(name) if h in self else "" for h in hostnames)
+
+ def add_rsync_history(self, h):
+ if h.hostname not in self:
+ self[h.hostname] = Host(h.hostname, self.session_id)
+ self[h.hostname].add_rsync_history(h)
+
+ def add_uri(self, u):
+ h = urlparse.urlparse(u).hostname
+ if h and h in self:
+ self[h].add_uri(u)
+
+ def finalize(self):
+ for h in self.itervalues():
+ h.finalize()
+
+ def calculate_failure_history(self, sessions):
+ start = self.date - datetime.timedelta(hours = window_hours)
+ sessions = tuple(s for s in sessions if s.date <= self.date and s.date > start)
+ for hostname, h in self.iteritems():
+ i = n = 0
+ for s in sessions:
+ if hostname in s:
+ i += s[hostname].failed
+ n += 1
+ h.failure_rate_running = float(100 * i) / n
+ self.calculated_failure_history = True
def plotter(f, hostnames, field, logscale = False):
- plotlines = sorted(session.get_plot_row(field, hostnames) for session in sessions)
- title = Host.format_dict[field].title
- ylabel = Host.format_dict[field].ylabel
- n = len(hostnames) + 1
- assert all(n == len(plotline) for plotline in plotlines)
- if "%%" in Host.format_dict[field].fmt:
- f.write('set format y "%.0f%%"\n')
- else:
- f.write('set format y\n')
- if logscale:
- f.write("set logscale y\n")
- else:
- f.write("unset logscale y\n")
- f.write("""
- set xdata time
- set timefmt '%Y-%m-%dT%H:%M:%SZ'
- #set format x '%m/%d'
- #set format x '%b%d'
- #set format x '%Y-%m-%d'
- set format x '%Y-%m'
- #set title '""" + title + """'
- set ylabel '""" + ylabel + """'
- plot""" + ",".join(" '-' using 1:2 with linespoints pointinterval 500 title '%s'" % h for h in hostnames) + "\n")
- for i in xrange(1, n):
- for plotline in plotlines:
- if plotline[i] is not None:
- f.write("%s %s\n" % (plotline[0], plotline[i].rstrip("%")))
- f.write("e\n")
+ plotlines = sorted(session.get_plot_row(field, hostnames) for session in sessions)
+ title = Host.format_dict[field].title
+ ylabel = Host.format_dict[field].ylabel
+ n = len(hostnames) + 1
+ assert all(n == len(plotline) for plotline in plotlines)
+ if "%%" in Host.format_dict[field].fmt:
+ f.write('set format y "%.0f%%"\n')
+ else:
+ f.write('set format y\n')
+ if logscale:
+ f.write("set logscale y\n")
+ else:
+ f.write("unset logscale y\n")
+ f.write("""
+ set xdata time
+ set timefmt '%Y-%m-%dT%H:%M:%SZ'
+ #set format x '%m/%d'
+ #set format x '%b%d'
+ #set format x '%Y-%m-%d'
+ set format x '%Y-%m'
+ #set title '""" + title + """'
+ set ylabel '""" + ylabel + """'
+ plot""" + ",".join(" '-' using 1:2 with linespoints pointinterval 500 title '%s'" % h for h in hostnames) + "\n")
+ for i in xrange(1, n):
+ for plotline in plotlines:
+ if plotline[i] is not None:
+ f.write("%s %s\n" % (plotline[0], plotline[i].rstrip("%")))
+ f.write("e\n")
def plot_hosts(hostnames, fields):
- for field in fields:
- for logscale in (False, True):
- gnuplot = subprocess.Popen(("gnuplot",), stdin = subprocess.PIPE)
- gnuplot.stdin.write("set terminal pdf\n")
- gnuplot.stdin.write("set output '%s/%s-%s.pdf'\n" % (outdir, field, "log" if logscale else "linear"))
- plotter(gnuplot.stdin, hostnames, field, logscale = logscale)
- gnuplot.stdin.close()
- gnuplot.wait()
+ for field in fields:
+ for logscale in (False, True):
+ gnuplot = subprocess.Popen(("gnuplot",), stdin = subprocess.PIPE)
+ gnuplot.stdin.write("set terminal pdf\n")
+ gnuplot.stdin.write("set output '%s/%s-%s.pdf'\n" % (outdir, field, "log" if logscale else "linear"))
+ plotter(gnuplot.stdin, hostnames, field, logscale = logscale)
+ gnuplot.stdin.close()
+ gnuplot.wait()
outdir = "images"
if not os.path.exists(outdir):
- os.makedirs(outdir)
+ os.makedirs(outdir)
mb = mailbox.Maildir("/u/sra/rpki/rcynic-xml", factory = None, create = False)
if sys.platform == "darwin": # Sigh
- shelf = shelve.open("rcynic-xml", "c")
+ shelf = shelve.open("rcynic-xml", "c")
else:
- shelf = shelve.open("rcynic-xml.db", "c")
+ shelf = shelve.open("rcynic-xml.db", "c")
sessions = []
@@ -237,55 +237,55 @@ latest = None
parsed = 0
for i, key in enumerate(mb.iterkeys(), 1):
- sys.stderr.write("\r%s %d/%d/%d..." % ("|\\-/"[i & 3], parsed, i, len(mb)))
-
- if key in shelf:
- session = shelf[key]
-
- else:
- sys.stderr.write("%s..." % key)
- assert not mb[key].is_multipart()
- input = ElementTreeFromString(mb[key].get_payload())
- date = input.get("date")
- sys.stderr.write("%s..." % date)
- session = Session(date, key)
- for elt in input.findall("rsync_history"):
- session.add_rsync_history(Rsync_History(elt))
- for elt in input.findall("validation_status"):
- if elt.get("generation") == "current":
- session.add_uri(elt.text.strip())
- session.finalize()
- shelf[key] = session
- parsed += 1
-
- sessions.append(session)
- if latest is None or session.session_id > latest.session_id:
- latest = session
+ sys.stderr.write("\r%s %d/%d/%d..." % ("|\\-/"[i & 3], parsed, i, len(mb)))
+
+ if key in shelf:
+ session = shelf[key]
+
+ else:
+ sys.stderr.write("%s..." % key)
+ assert not mb[key].is_multipart()
+ input = ElementTreeFromString(mb[key].get_payload())
+ date = input.get("date")
+ sys.stderr.write("%s..." % date)
+ session = Session(date, key)
+ for elt in input.findall("rsync_history"):
+ session.add_rsync_history(Rsync_History(elt))
+ for elt in input.findall("validation_status"):
+ if elt.get("generation") == "current":
+ session.add_uri(elt.text.strip())
+ session.finalize()
+ shelf[key] = session
+ parsed += 1
+
+ sessions.append(session)
+ if latest is None or session.session_id > latest.session_id:
+ latest = session
sys.stderr.write("\n")
shelf.sync()
for session in sessions:
- if not getattr(session, "calculated_failure_history", False):
- session.calculate_failure_history(sessions)
- shelf[session.msg_key] = session
+ if not getattr(session, "calculated_failure_history", False):
+ session.calculate_failure_history(sessions)
+ shelf[session.msg_key] = session
if plot_all_hosts:
- hostnames = sorted(reduce(lambda x, y: x | y,
- (s.hostnames for s in sessions),
- set()))
+ hostnames = sorted(reduce(lambda x, y: x | y,
+ (s.hostnames for s in sessions),
+ set()))
else:
- hostnames = ("rpki.apnic.net", "rpki.ripe.net", "repository.lacnic.net", "rpki.afrinic.net", "rpki.arin.net",
- #"localcert.ripe.net", "arin.rpki.net", "repo0.rpki.net", "rgnet.rpki.net",
- "ca0.rpki.net")
+ hostnames = ("rpki.apnic.net", "rpki.ripe.net", "repository.lacnic.net", "rpki.afrinic.net", "rpki.arin.net",
+ #"localcert.ripe.net", "arin.rpki.net", "repo0.rpki.net", "rgnet.rpki.net",
+ "ca0.rpki.net")
plot_hosts(hostnames, [fmt.attr for fmt in Host.format if fmt.attr != "hostname"])
if latest is not None:
- f = open("rcynic.xml", "wb")
- f.write(mb[latest.msg_key].get_payload())
- f.close()
+ f = open("rcynic.xml", "wb")
+ f.write(mb[latest.msg_key].get_payload())
+ f.close()
shelf.close()