aboutsummaryrefslogtreecommitdiff
path: root/scripts/analyze-rcynic-history.py
diff options
context:
space:
mode:
authorRob Austein <sra@hactrn.net>2012-03-19 21:14:48 +0000
committerRob Austein <sra@hactrn.net>2012-03-19 21:14:48 +0000
commit8e3eb0fc2b371e1eb0f93afad97ff6cd53e7d78b (patch)
treefb5df7edce1c65230beb4770de2d0561d7387cd8 /scripts/analyze-rcynic-history.py
parent4950a77da61c9993788c0557ef362ecdc0a1cf18 (diff)
parentd559bc6a29311fa1c414ce8cc3632f5f74ee8485 (diff)
Sync from trunk.
svn path=/branches/tk161/; revision=4403
Diffstat (limited to 'scripts/analyze-rcynic-history.py')
-rw-r--r--scripts/analyze-rcynic-history.py149
1 files changed, 84 insertions, 65 deletions
diff --git a/scripts/analyze-rcynic-history.py b/scripts/analyze-rcynic-history.py
index 7d918198..f45a0578 100644
--- a/scripts/analyze-rcynic-history.py
+++ b/scripts/analyze-rcynic-history.py
@@ -19,12 +19,18 @@ OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
"""
-plot_all_hosts = False
-plot_to_one = True
-plot_to_many = True
-write_rcynic_xml = True
+plot_all_hosts = False
-import mailbox, sys, urlparse, os, getopt, datetime, subprocess
+window_hours = 72
+
+import mailbox
+import sys
+import urlparse
+import os
+import getopt
+import datetime
+import subprocess
+import shelve
from xml.etree.cElementTree import (ElementTree as ElementTree,
fromstring as ElementTreeFromString)
@@ -73,12 +79,14 @@ class Host(object):
del self.uris
@property
- def failure_rate_percentage(self):
- return float(self.dead_connections * 100) / float(self.connection_count)
+ def failed(self):
+ return 1 if self.dead_connections else 0
@property
def seconds_per_object(self):
- return float(self.elapsed.total_seconds()) / float(self.object_count)
+ return float(self.elapsed.days * 24 * 60 * 60 +
+ self.elapsed.seconds +
+ self.elapsed.microseconds / 10**6) / float(self.object_count)
@property
def objects_per_connection(self):
@@ -86,16 +94,19 @@ class Host(object):
@property
def average_connection_time(self):
- return float(self.total_connection_time.total_seconds()) / float(self.connection_count)
+ return float(self.total_connection_time.days * 24 * 60 * 60 +
+ self.total_connection_time.seconds +
+ self.total_connection_time.microseconds / 10**6) / float(self.connection_count)
class Format(object):
- def __init__(self, attr, title, fmt):
+ def __init__(self, attr, title, fmt, ylabel = ""):
self.attr = attr
self.title = title
self.width = len(title) - int("%" in fmt)
self.fmt = "%%%d%s" % (self.width, fmt)
self.oops = "*" * self.width
+ self.ylabel = ylabel
def __call__(self, obj):
try:
@@ -103,21 +114,14 @@ class Host(object):
except ZeroDivisionError:
return self.oops
- format = (Format("connection_count", "Connections", "d"),
- Format("object_count", "Objects", "d"),
- Format("objects_per_connection", "Objects/Connection", ".3f"),
- Format("seconds_per_object", "Seconds/Object", ".3f"),
- Format("failure_rate_percentage", "Failure Rate", ".3f%%"),
- Format("average_connection_time", "Average Connection", ".3f"),
+ format = (Format("connection_count", "Connections", "d", "Connections To Repository (Per Session)"),
+ Format("object_count", "Objects", "d", "Objects In Repository (Distinct URIs Per Session)"),
+ Format("objects_per_connection", "Objects/Connection", ".3f", "Objects In Repository / Connections To Repository"),
+ Format("seconds_per_object", "Seconds/Object", ".3f", "Seconds To Transfer / Object (Average Per Session)"),
+ Format("failure_rate_running", "Failure Rate", ".3f%%", "Sessions With Failed Connections Within Last %d Hours" % window_hours),
+ Format("average_connection_time", "Average Connection", ".3f", "Seconds / Connection (Average Per Session)"),
Format("hostname", "Hostname", "s"))
- separator = " " * 2
-
- header = separator.join(fmt.title for fmt in format)
-
- def __str__(self):
- return self.separator.join(fmt(self) for fmt in self.format)
-
format_dict = dict((fmt.attr, fmt) for fmt in format)
def format_field(self, name):
@@ -132,6 +136,8 @@ class Session(dict):
def __init__(self, session_id, msg_key):
self.session_id = session_id
self.msg_key = msg_key
+ self.date = parse_utc(session_id)
+ self.calculated_failure_history = False
@property
def hostnames(self):
@@ -154,9 +160,22 @@ class Session(dict):
for h in self.itervalues():
h.finalize()
+ def calculate_failure_history(self, sessions):
+ start = self.date - datetime.timedelta(hours = window_hours)
+ sessions = tuple(s for s in sessions if s.date <= self.date and s.date > start)
+ for hostname, h in self.iteritems():
+ i = n = 0
+ for s in sessions:
+ if hostname in s:
+ i += s[hostname].failed
+ n += 1
+ h.failure_rate_running = float(100 * i) / n
+ self.calculated_failure_history = True
+
def plotter(f, hostnames, field, logscale = False):
plotlines = sorted(session.get_plot_row(field, hostnames) for session in sessions)
title = Host.format_dict[field].title
+ ylabel = Host.format_dict[field].ylabel
n = len(hostnames) + 1
assert all(n == len(plotline) for plotline in plotlines)
if "%%" in Host.format_dict[field].fmt:
@@ -171,15 +190,18 @@ def plotter(f, hostnames, field, logscale = False):
set xdata time
set timefmt '%Y-%m-%dT%H:%M:%SZ'
#set format x '%m/%d'
- set format x '%b%d'
+ #set format x '%b%d'
+ #set format x '%Y-%m-%d'
+ set format x '%Y-%m'
#set title '""" + title + """'
+ set ylabel '""" + ylabel + """'
plot""" + ",".join(" '-' using 1:2 with linespoints pointinterval 500 title '%s'" % h for h in hostnames) + "\n")
for i in xrange(1, n):
for plotline in plotlines:
f.write("%s %s\n" % (plotline[0], plotline[i].rstrip("%")))
f.write("e\n")
-def plot_many(hostnames, fields):
+def plot_hosts(hostnames, fields):
for field in fields:
for logscale in (False, True):
gnuplot = subprocess.Popen(("gnuplot",), stdin = subprocess.PIPE)
@@ -189,70 +211,67 @@ def plot_many(hostnames, fields):
gnuplot.stdin.close()
gnuplot.wait()
-def plot_one(hostnames, fields):
- gnuplot = subprocess.Popen(("gnuplot",), stdin = subprocess.PIPE)
- gnuplot.stdin.write("set terminal pdf\n")
- gnuplot.stdin.write("set output 'analyze-rcynic-history.pdf'\n")
- for field in fields:
- if field != "hostname":
- plotter(gnuplot.stdin, hostnames, field, logscale = False)
- plotter(gnuplot.stdin, hostnames, field, logscale = True)
- gnuplot.stdin.close()
- gnuplot.wait()
-
mb = mailbox.Maildir("/u/sra/rpki/rcynic-xml", factory = None, create = False)
+if sys.platform == "darwin": # Sigh
+ shelf = shelve.open("rcynic-xml", "c")
+else:
+ shelf = shelve.open("rcynic-xml.db", "c")
+
sessions = []
latest = None
for i, key in enumerate(mb.iterkeys(), 1):
+ sys.stderr.write("\r%s Reading %d/%d..." % ("|\\-/"[i & 3], i, len(mb)))
- sys.stderr.write("\r%s %d/%d..." % ("|\\-/"[i & 3], i, len(mb)))
+ if key in shelf:
+ session = shelf[key]
- assert not mb[key].is_multipart()
-
- input = ElementTreeFromString(mb[key].get_payload())
-
- date = input.get("date")
-
- sys.stderr.write("%s..." % date)
+ else:
+ assert not mb[key].is_multipart()
+ input = ElementTreeFromString(mb[key].get_payload())
+ date = input.get("date")
+ sys.stderr.write("%s..." % date)
+ session = Session(date, key)
+ for elt in input.findall("rsync_history"):
+ session.add_rsync_history(Rsync_History(elt))
+ for elt in input.findall("validation_status"):
+ if elt.get("generation") == "current":
+ session.add_uri(elt.text.strip())
+ session.finalize()
+ shelf[key] = session
- session = Session(date, key)
sessions.append(session)
-
if latest is None or session.session_id > latest.session_id:
latest = session
- for elt in input.findall("rsync_history"):
- session.add_rsync_history(Rsync_History(elt))
-
- for elt in input.findall("validation_status"):
- if elt.get("generation") == "current":
- session.add_uri(elt.text.strip())
+sys.stderr.write("\n")
- session.finalize()
+shelf.sync()
+for i, session in enumerate(sessions, 1):
+ sys.stderr.write("\r%s Failure history %d/%d...%s..." % ("|\\-/"[i & 3], i, len(sessions), session.session_id))
+ if not getattr(session, "calculated_failure_history", False):
+ session.calculate_failure_history(sessions)
+ shelf[session.msg_key] = session
sys.stderr.write("\n")
if plot_all_hosts:
- hostnames = set()
- for session in sessions:
- hostnames.update(session.hostnames)
- hostnames = sorted(hostnames)
+ hostnames = sorted(reduce(lambda x, y: x | y,
+ (s.hostnames for s in sessions),
+ set()))
else:
hostnames = ("rpki.apnic.net", "rpki.ripe.net", "repository.lacnic.net",
- "rpki.afrinic.net", "arin.rpki.net", "rgnet.rpki.net",
- "rpki-pilot.arin.net")
+ "rpki.afrinic.net", "rpki-pilot.arin.net",
+ "arin.rpki.net", "rgnet.rpki.net")
-fields = [fmt.attr for fmt in Host.format if fmt.attr != "hostname"]
-if plot_to_one:
- plot_one(hostnames, fields)
-if plot_to_many:
- plot_many(hostnames, fields)
+plot_hosts(hostnames, [fmt.attr for fmt in Host.format if fmt.attr != "hostname"])
-if write_rcynic_xml and latest is not None:
+if latest is not None:
f = open("rcynic.xml", "wb")
f.write(mb[latest.msg_key].get_payload())
f.close()
+
+shelf.close()