aboutsummaryrefslogtreecommitdiff
path: root/potpourri/analyze-rcynic-history.py
diff options
context:
space:
mode:
authorRob Austein <sra@hactrn.net>2015-10-26 06:29:00 +0000
committerRob Austein <sra@hactrn.net>2015-10-26 06:29:00 +0000
commitb46deb1417dc3596e9ac9fe2fe8cc0b7f42457e7 (patch)
treeca0dc0276d1adc168bc3337ce0564c4ec4957c1b /potpourri/analyze-rcynic-history.py
parent397beaf6d9900dc3b3cb612c89ebf1d57b1d16f6 (diff)
"Any programmer who fails to comply with the standard naming, formatting,
or commenting conventions should be shot. If it so happens that it is inconvenient to shoot him, then he is to be politely requested to recode his program in adherence to the above standard." -- Michael Spier, Digital Equipment Corporation svn path=/branches/tk705/; revision=6152
Diffstat (limited to 'potpourri/analyze-rcynic-history.py')
-rw-r--r--potpourri/analyze-rcynic-history.py428
1 files changed, 214 insertions, 214 deletions
diff --git a/potpourri/analyze-rcynic-history.py b/potpourri/analyze-rcynic-history.py
index 648538cc..c0836ab2 100644
--- a/potpourri/analyze-rcynic-history.py
+++ b/potpourri/analyze-rcynic-history.py
@@ -1,11 +1,11 @@
# $Id$
-#
+#
# Copyright (C) 2011-2012 Internet Systems Consortium ("ISC")
-#
+#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
-#
+#
# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
# AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
@@ -35,201 +35,201 @@ from xml.etree.cElementTree import (ElementTree as ElementTree,
fromstring as ElementTreeFromString)
def parse_utc(s):
- return datetime.datetime.strptime(s, "%Y-%m-%dT%H:%M:%SZ")
+ return datetime.datetime.strptime(s, "%Y-%m-%dT%H:%M:%SZ")
class Rsync_History(object):
- """
- An Rsync_History object represents one rsync connection.
- """
+ """
+ An Rsync_History object represents one rsync connection.
+ """
- def __init__(self, elt):
- self.error = elt.get("error")
- self.uri = elt.text.strip()
- self.hostname = urlparse.urlparse(self.uri).hostname or None
- self.elapsed = parse_utc(elt.get("finished")) - parse_utc(elt.get("started"))
+ def __init__(self, elt):
+ self.error = elt.get("error")
+ self.uri = elt.text.strip()
+ self.hostname = urlparse.urlparse(self.uri).hostname or None
+ self.elapsed = parse_utc(elt.get("finished")) - parse_utc(elt.get("started"))
class Host(object):
- """
- A host object represents all the data collected for one host. Note
- that it (usually) contains a list of all the sessions in which this
- host appears.
- """
-
- def __init__(self, hostname, session_id):
- self.hostname = hostname
- self.session_id = session_id
- self.elapsed = datetime.timedelta(0)
- self.connection_count = 0
- self.dead_connections = 0
- self.uris = set()
- self.total_connection_time = datetime.timedelta(0)
-
- def add_rsync_history(self, h):
- self.connection_count += 1
- self.elapsed += h.elapsed
- self.dead_connections += int(h.error is not None)
- self.total_connection_time += h.elapsed
-
- def add_uri(self, u):
- self.uris.add(u)
-
- def finalize(self):
- self.object_count = len(self.uris)
- del self.uris
-
- @property
- def failed(self):
- return 1 if self.dead_connections else 0
-
- @property
- def seconds_per_object(self):
- if self.failed:
- return None
- else:
- return float(self.elapsed.days * 24 * 60 * 60 +
- self.elapsed.seconds +
- self.elapsed.microseconds / 10**6) / float(self.object_count)
-
- @property
- def objects_per_connection(self):
- if self.failed:
- return None
- else:
- return float(self.object_count) / float(self.connection_count)
-
- @property
- def average_connection_time(self):
- return float(self.total_connection_time.days * 24 * 60 * 60 +
- self.total_connection_time.seconds +
- self.total_connection_time.microseconds / 10**6) / float(self.connection_count)
-
- class Format(object):
-
- def __init__(self, attr, title, fmt, ylabel = ""):
- self.attr = attr
- self.title = title
- self.width = len(title) - int("%" in fmt)
- self.fmt = "%%%d%s" % (self.width, fmt)
- self.oops = "*" * self.width
- self.ylabel = ylabel
-
- def __call__(self, obj):
- try:
- value = getattr(obj, self.attr)
- return None if value is None else self.fmt % value
- except ZeroDivisionError:
- return self.oops
-
- format = (Format("connection_count", "Connections", "d", "Connections To Repository (Per Session)"),
- Format("object_count", "Objects", "d", "Objects In Repository (Distinct URIs Per Session)"),
- Format("objects_per_connection", "Objects/Connection", ".3f", "Objects In Repository / Connections To Repository"),
- Format("seconds_per_object", "Seconds/Object", ".3f", "Seconds To Transfer / Object (Average Per Session)"),
- Format("failure_rate_running", "Failure Rate", ".3f%%", "Sessions With Failed Connections Within Last %d Hours" % window_hours),
- Format("average_connection_time", "Average Connection", ".3f", "Seconds / Connection (Average Per Session)"),
- Format("hostname", "Hostname", "s"))
-
- format_dict = dict((fmt.attr, fmt) for fmt in format)
-
- def format_field(self, name):
- result = self.format_dict[name](self)
- return None if result is None else result.strip()
+ """
+ A host object represents all the data collected for one host. Note
+ that it (usually) contains a list of all the sessions in which this
+ host appears.
+ """
+
+ def __init__(self, hostname, session_id):
+ self.hostname = hostname
+ self.session_id = session_id
+ self.elapsed = datetime.timedelta(0)
+ self.connection_count = 0
+ self.dead_connections = 0
+ self.uris = set()
+ self.total_connection_time = datetime.timedelta(0)
+
+ def add_rsync_history(self, h):
+ self.connection_count += 1
+ self.elapsed += h.elapsed
+ self.dead_connections += int(h.error is not None)
+ self.total_connection_time += h.elapsed
+
+ def add_uri(self, u):
+ self.uris.add(u)
+
+ def finalize(self):
+ self.object_count = len(self.uris)
+ del self.uris
+
+ @property
+ def failed(self):
+ return 1 if self.dead_connections else 0
+
+ @property
+ def seconds_per_object(self):
+ if self.failed:
+ return None
+ else:
+ return float(self.elapsed.days * 24 * 60 * 60 +
+ self.elapsed.seconds +
+ self.elapsed.microseconds / 10**6) / float(self.object_count)
+
+ @property
+ def objects_per_connection(self):
+ if self.failed:
+ return None
+ else:
+ return float(self.object_count) / float(self.connection_count)
+
+ @property
+ def average_connection_time(self):
+ return float(self.total_connection_time.days * 24 * 60 * 60 +
+ self.total_connection_time.seconds +
+ self.total_connection_time.microseconds / 10**6) / float(self.connection_count)
+
+ class Format(object):
+
+ def __init__(self, attr, title, fmt, ylabel = ""):
+ self.attr = attr
+ self.title = title
+ self.width = len(title) - int("%" in fmt)
+ self.fmt = "%%%d%s" % (self.width, fmt)
+ self.oops = "*" * self.width
+ self.ylabel = ylabel
+
+ def __call__(self, obj):
+ try:
+ value = getattr(obj, self.attr)
+ return None if value is None else self.fmt % value
+ except ZeroDivisionError:
+ return self.oops
+
+ format = (Format("connection_count", "Connections", "d", "Connections To Repository (Per Session)"),
+ Format("object_count", "Objects", "d", "Objects In Repository (Distinct URIs Per Session)"),
+ Format("objects_per_connection", "Objects/Connection", ".3f", "Objects In Repository / Connections To Repository"),
+ Format("seconds_per_object", "Seconds/Object", ".3f", "Seconds To Transfer / Object (Average Per Session)"),
+ Format("failure_rate_running", "Failure Rate", ".3f%%", "Sessions With Failed Connections Within Last %d Hours" % window_hours),
+ Format("average_connection_time", "Average Connection", ".3f", "Seconds / Connection (Average Per Session)"),
+ Format("hostname", "Hostname", "s"))
+
+ format_dict = dict((fmt.attr, fmt) for fmt in format)
+
+ def format_field(self, name):
+ result = self.format_dict[name](self)
+ return None if result is None else result.strip()
class Session(dict):
- """
- A session corresponds to one XML file. This is a dictionary of Host
- objects, keyed by hostname.
- """
-
- def __init__(self, session_id, msg_key):
- self.session_id = session_id
- self.msg_key = msg_key
- self.date = parse_utc(session_id)
- self.calculated_failure_history = False
-
- @property
- def hostnames(self):
- return set(self.iterkeys())
-
- def get_plot_row(self, name, hostnames):
- return (self.session_id,) + tuple(self[h].format_field(name) if h in self else "" for h in hostnames)
-
- def add_rsync_history(self, h):
- if h.hostname not in self:
- self[h.hostname] = Host(h.hostname, self.session_id)
- self[h.hostname].add_rsync_history(h)
-
- def add_uri(self, u):
- h = urlparse.urlparse(u).hostname
- if h and h in self:
- self[h].add_uri(u)
-
- def finalize(self):
- for h in self.itervalues():
- h.finalize()
-
- def calculate_failure_history(self, sessions):
- start = self.date - datetime.timedelta(hours = window_hours)
- sessions = tuple(s for s in sessions if s.date <= self.date and s.date > start)
- for hostname, h in self.iteritems():
- i = n = 0
- for s in sessions:
- if hostname in s:
- i += s[hostname].failed
- n += 1
- h.failure_rate_running = float(100 * i) / n
- self.calculated_failure_history = True
+ """
+ A session corresponds to one XML file. This is a dictionary of Host
+ objects, keyed by hostname.
+ """
+
+ def __init__(self, session_id, msg_key):
+ self.session_id = session_id
+ self.msg_key = msg_key
+ self.date = parse_utc(session_id)
+ self.calculated_failure_history = False
+
+ @property
+ def hostnames(self):
+ return set(self.iterkeys())
+
+ def get_plot_row(self, name, hostnames):
+ return (self.session_id,) + tuple(self[h].format_field(name) if h in self else "" for h in hostnames)
+
+ def add_rsync_history(self, h):
+ if h.hostname not in self:
+ self[h.hostname] = Host(h.hostname, self.session_id)
+ self[h.hostname].add_rsync_history(h)
+
+ def add_uri(self, u):
+ h = urlparse.urlparse(u).hostname
+ if h and h in self:
+ self[h].add_uri(u)
+
+ def finalize(self):
+ for h in self.itervalues():
+ h.finalize()
+
+ def calculate_failure_history(self, sessions):
+ start = self.date - datetime.timedelta(hours = window_hours)
+ sessions = tuple(s for s in sessions if s.date <= self.date and s.date > start)
+ for hostname, h in self.iteritems():
+ i = n = 0
+ for s in sessions:
+ if hostname in s:
+ i += s[hostname].failed
+ n += 1
+ h.failure_rate_running = float(100 * i) / n
+ self.calculated_failure_history = True
def plotter(f, hostnames, field, logscale = False):
- plotlines = sorted(session.get_plot_row(field, hostnames) for session in sessions)
- title = Host.format_dict[field].title
- ylabel = Host.format_dict[field].ylabel
- n = len(hostnames) + 1
- assert all(n == len(plotline) for plotline in plotlines)
- if "%%" in Host.format_dict[field].fmt:
- f.write('set format y "%.0f%%"\n')
- else:
- f.write('set format y\n')
- if logscale:
- f.write("set logscale y\n")
- else:
- f.write("unset logscale y\n")
- f.write("""
- set xdata time
- set timefmt '%Y-%m-%dT%H:%M:%SZ'
- #set format x '%m/%d'
- #set format x '%b%d'
- #set format x '%Y-%m-%d'
- set format x '%Y-%m'
- #set title '""" + title + """'
- set ylabel '""" + ylabel + """'
- plot""" + ",".join(" '-' using 1:2 with linespoints pointinterval 500 title '%s'" % h for h in hostnames) + "\n")
- for i in xrange(1, n):
- for plotline in plotlines:
- if plotline[i] is not None:
- f.write("%s %s\n" % (plotline[0], plotline[i].rstrip("%")))
- f.write("e\n")
+ plotlines = sorted(session.get_plot_row(field, hostnames) for session in sessions)
+ title = Host.format_dict[field].title
+ ylabel = Host.format_dict[field].ylabel
+ n = len(hostnames) + 1
+ assert all(n == len(plotline) for plotline in plotlines)
+ if "%%" in Host.format_dict[field].fmt:
+ f.write('set format y "%.0f%%"\n')
+ else:
+ f.write('set format y\n')
+ if logscale:
+ f.write("set logscale y\n")
+ else:
+ f.write("unset logscale y\n")
+ f.write("""
+ set xdata time
+ set timefmt '%Y-%m-%dT%H:%M:%SZ'
+ #set format x '%m/%d'
+ #set format x '%b%d'
+ #set format x '%Y-%m-%d'
+ set format x '%Y-%m'
+ #set title '""" + title + """'
+ set ylabel '""" + ylabel + """'
+ plot""" + ",".join(" '-' using 1:2 with linespoints pointinterval 500 title '%s'" % h for h in hostnames) + "\n")
+ for i in xrange(1, n):
+ for plotline in plotlines:
+ if plotline[i] is not None:
+ f.write("%s %s\n" % (plotline[0], plotline[i].rstrip("%")))
+ f.write("e\n")
def plot_hosts(hostnames, fields):
- for field in fields:
- for logscale in (False, True):
- gnuplot = subprocess.Popen(("gnuplot",), stdin = subprocess.PIPE)
- gnuplot.stdin.write("set terminal pdf\n")
- gnuplot.stdin.write("set output '%s/%s-%s.pdf'\n" % (outdir, field, "log" if logscale else "linear"))
- plotter(gnuplot.stdin, hostnames, field, logscale = logscale)
- gnuplot.stdin.close()
- gnuplot.wait()
+ for field in fields:
+ for logscale in (False, True):
+ gnuplot = subprocess.Popen(("gnuplot",), stdin = subprocess.PIPE)
+ gnuplot.stdin.write("set terminal pdf\n")
+ gnuplot.stdin.write("set output '%s/%s-%s.pdf'\n" % (outdir, field, "log" if logscale else "linear"))
+ plotter(gnuplot.stdin, hostnames, field, logscale = logscale)
+ gnuplot.stdin.close()
+ gnuplot.wait()
outdir = "images"
if not os.path.exists(outdir):
- os.makedirs(outdir)
+ os.makedirs(outdir)
mb = mailbox.Maildir("/u/sra/rpki/rcynic-xml", factory = None, create = False)
if sys.platform == "darwin": # Sigh
- shelf = shelve.open("rcynic-xml", "c")
+ shelf = shelve.open("rcynic-xml", "c")
else:
- shelf = shelve.open("rcynic-xml.db", "c")
+ shelf = shelve.open("rcynic-xml.db", "c")
sessions = []
@@ -237,55 +237,55 @@ latest = None
parsed = 0
for i, key in enumerate(mb.iterkeys(), 1):
- sys.stderr.write("\r%s %d/%d/%d..." % ("|\\-/"[i & 3], parsed, i, len(mb)))
-
- if key in shelf:
- session = shelf[key]
-
- else:
- sys.stderr.write("%s..." % key)
- assert not mb[key].is_multipart()
- input = ElementTreeFromString(mb[key].get_payload())
- date = input.get("date")
- sys.stderr.write("%s..." % date)
- session = Session(date, key)
- for elt in input.findall("rsync_history"):
- session.add_rsync_history(Rsync_History(elt))
- for elt in input.findall("validation_status"):
- if elt.get("generation") == "current":
- session.add_uri(elt.text.strip())
- session.finalize()
- shelf[key] = session
- parsed += 1
-
- sessions.append(session)
- if latest is None or session.session_id > latest.session_id:
- latest = session
+ sys.stderr.write("\r%s %d/%d/%d..." % ("|\\-/"[i & 3], parsed, i, len(mb)))
+
+ if key in shelf:
+ session = shelf[key]
+
+ else:
+ sys.stderr.write("%s..." % key)
+ assert not mb[key].is_multipart()
+ input = ElementTreeFromString(mb[key].get_payload())
+ date = input.get("date")
+ sys.stderr.write("%s..." % date)
+ session = Session(date, key)
+ for elt in input.findall("rsync_history"):
+ session.add_rsync_history(Rsync_History(elt))
+ for elt in input.findall("validation_status"):
+ if elt.get("generation") == "current":
+ session.add_uri(elt.text.strip())
+ session.finalize()
+ shelf[key] = session
+ parsed += 1
+
+ sessions.append(session)
+ if latest is None or session.session_id > latest.session_id:
+ latest = session
sys.stderr.write("\n")
shelf.sync()
for session in sessions:
- if not getattr(session, "calculated_failure_history", False):
- session.calculate_failure_history(sessions)
- shelf[session.msg_key] = session
+ if not getattr(session, "calculated_failure_history", False):
+ session.calculate_failure_history(sessions)
+ shelf[session.msg_key] = session
if plot_all_hosts:
- hostnames = sorted(reduce(lambda x, y: x | y,
- (s.hostnames for s in sessions),
- set()))
+ hostnames = sorted(reduce(lambda x, y: x | y,
+ (s.hostnames for s in sessions),
+ set()))
else:
- hostnames = ("rpki.apnic.net", "rpki.ripe.net", "repository.lacnic.net", "rpki.afrinic.net", "rpki.arin.net",
- #"localcert.ripe.net", "arin.rpki.net", "repo0.rpki.net", "rgnet.rpki.net",
- "ca0.rpki.net")
+ hostnames = ("rpki.apnic.net", "rpki.ripe.net", "repository.lacnic.net", "rpki.afrinic.net", "rpki.arin.net",
+ #"localcert.ripe.net", "arin.rpki.net", "repo0.rpki.net", "rgnet.rpki.net",
+ "ca0.rpki.net")
plot_hosts(hostnames, [fmt.attr for fmt in Host.format if fmt.attr != "hostname"])
if latest is not None:
- f = open("rcynic.xml", "wb")
- f.write(mb[latest.msg_key].get_payload())
- f.close()
+ f = open("rcynic.xml", "wb")
+ f.write(mb[latest.msg_key].get_payload())
+ f.close()
shelf.close()