From d47b19fe99f05c4bc49dbdf01ac959e6bfdcaf21 Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Tue, 31 Jan 2012 02:15:12 +0000 Subject: Rewrite to avoid running out of memory. Painfully slow with three month's worth of data, but at least it now runs again. svn path=/trunk/; revision=4273 --- scripts/analyze-rcynic-history.py | 147 +++++++++++++++----------------------- 1 file changed, 57 insertions(+), 90 deletions(-) (limited to 'scripts/analyze-rcynic-history.py') diff --git a/scripts/analyze-rcynic-history.py b/scripts/analyze-rcynic-history.py index 1713e7ce..e06c9c7d 100644 --- a/scripts/analyze-rcynic-history.py +++ b/scripts/analyze-rcynic-history.py @@ -4,7 +4,7 @@ summaries and run gnuplot to draw some pictures. $Id$ -Copyright (C) 2011 Internet Systems Consortium ("ISC") +Copyright (C) 2011-2012 Internet Systems Consortium ("ISC") Permission to use, copy, modify, and distribute this software for any purpose with or without fee is hereby granted, provided that the above @@ -19,8 +19,6 @@ OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. """ -show_summary = True -show_sessions = True show_plot = True plot_all_hosts = False plot_to_one = True @@ -31,65 +29,51 @@ import mailbox, sys, urlparse, os, getopt, datetime, subprocess from xml.etree.cElementTree import (ElementTree as ElementTree, fromstring as ElementTreeFromString) -class Rsync_History(object): +def parse_utc(s): + return datetime.datetime.strptime(s, "%Y-%m-%dT%H:%M:%SZ") - timestamp_format = "%Y-%m-%dT%H:%M:%SZ" +class Rsync_History(object): + """ + An Rsync_History object represents one rsync connection. + """ def __init__(self, elt): - self.started = datetime.datetime.strptime(elt.get("started"), self.timestamp_format) - self.finished = datetime.datetime.strptime(elt.get("finished"), self.timestamp_format) self.error = elt.get("error") self.uri = elt.text.strip() self.hostname = urlparse.urlparse(self.uri).hostname or None - self.elapsed = self.finished - self.started - - def __cmp__(self, other): - return (cmp(self.started, other.started) or - cmp(self.finished, other.finished) or - cmp(self.hostname, other.hostname)) + self.elapsed = parse_utc(elt.get("finished")) - parse_utc(elt.get("started")) class Host(object): + """ + A host object represents all the data collected for one host. Note + that it (usually) contains a list of all the sessions in which this + host appears. - def __init__(self, hostname, session_id = None): + This is probably keeping far too much data, and needs to be pruned + to keep memory consumption to something sane. + """ + + def __init__(self, hostname, session_id): self.hostname = hostname - self.session_ids = [] - if session_id is not None: - self.session_ids.append(session_id) + self.session_id = session_id self.elapsed = datetime.timedelta(0) self.connection_count = 0 self.dead_connections = 0 self.uris = set() - self.connections = [] - - def __add__(self, other): - assert self.hostname == other.hostname - result = self.__class__(self.hostname) - for a in ("elapsed", "connection_count", "dead_connections", "session_ids", "connections"): - setattr(result, a, getattr(self, a) + getattr(other, a)) - result.uris = self.uris | other.uris - return result + self.total_connection_time = datetime.timedelta(0) def add_rsync_history(self, h): - self.connection_count += 1 - self.elapsed += h.elapsed - self.dead_connections += int(h.error is not None) - self.connections.append(h) + self.connection_count += 1 + self.elapsed += h.elapsed + self.dead_connections += int(h.error is not None) + self.total_connection_time += h.elapsed def add_uri(self, u): self.uris.add(u) - @property - def session_id(self): - assert len(self.session_ids) == 1 - return self.session_ids[0] - - @property - def session_count(self): - return len(self.session_ids) - - @property - def object_count(self): - return len(self.uris) + def finalize(self): + self.object_count = len(self.uris) + del self.uris @property def failure_rate_percentage(self): @@ -97,28 +81,15 @@ class Host(object): @property def seconds_per_object(self): - return (float((self.elapsed.days * 24 * 3600 + self.elapsed.seconds) * 10**6 + - self.elapsed.microseconds) / - float(self.object_count * self.session_count * 10**6)) + return float(self.elapsed.total_seconds()) / float(self.object_count) @property def objects_per_connection(self): - return (float(self.object_count * self.session_count) / - float(self.connection_count)) - - @property - def scaled_connections(self): - return float(self.connection_count) / float(self.session_count) - - @property - def scaled_elapsed(self): - return self.elapsed / self.session_count + return float(self.object_count) / float(self.connection_count) @property def average_connection_time(self): - return (float(sum(((c.elapsed.days * 24 * 3600 + c.elapsed.seconds) * 10**6 + c.elapsed.microseconds) - for c in self.connections)) / - float(self.connection_count * 10**6)) + return float(self.total_connection_time.total_seconds()) / float(self.connection_count) class Format(object): @@ -135,8 +106,7 @@ class Host(object): except ZeroDivisionError: return self.oops - format = (Format("scaled_elapsed", "Rsync Time", ".10s"), - Format("scaled_connections", "Connections", "d"), + format = (Format("connection_count", "Connections", "d"), Format("object_count", "Objects", "d"), Format("objects_per_connection", "Objects/Connection", ".3f"), Format("seconds_per_object", "Seconds/Object", ".3f"), @@ -157,6 +127,15 @@ class Host(object): return self.format_dict[name](self).strip() class Session(dict): + """ + A session corresponds to one XML file. This is a dictionary of Host + objects, keyed by hostname. + + We might need some kind of .finalize() method which throws away + unnecessary data to keep memory consumption down after we've read + the whole session. Most likely this would just be a pass through to + a Host.finalize() method which would do the real work. + """ def __init__(self, session_id = None): self.session_id = session_id @@ -168,17 +147,6 @@ class Session(dict): def get_plot_row(self, name, hostnames): return (self.session_id,) + tuple(self[h].format_field(name) if h in self else "" for h in hostnames) - def __add__(self, other): - result = self.__class__() - for h in self.hostnames | other.hostnames: - if h in self and h in other: - result[h] = self[h] + other[h] - elif h in self: - result[h] = self[h] - else: - result[h] = other[h] - return result - def add_rsync_history(self, h): if h.hostname not in self: self[h.hostname] = Host(h.hostname, self.session_id) @@ -189,18 +157,17 @@ class Session(dict): if h and h in self: self[h].add_uri(u) - def dump(self, title, f = sys.stdout): - f.write("\n" + title + "\n" + Host.header + "\n") - for h in sorted(self): - f.write(str(self[h]) + "\n") + def finalize(self): + for h in self.itervalues(): + h.finalize() mb = mailbox.Maildir("/u/sra/rpki/rcynic-xml", factory = None, create = False) sessions = [] -for msg in mb.itervalues(): +for i, msg in enumerate(mb.itervalues()): - sys.stderr.write(".") + sys.stderr.write("\r%s %d/%d..." % ("|\\-/"[i & 3], i, len(mb))) assert not msg.is_multipart() @@ -216,16 +183,9 @@ for msg in mb.itervalues(): if elt.get("generation") == "current": session.add_uri(elt.text.strip()) -sys.stderr.write("\n") - -summary = sum(sessions, Session()) - -if show_summary: - summary.dump("Summary (%d sessions)" % len(sessions)) + session.finalize() -if show_sessions: - for i, session in enumerate(sessions, 1): - session.dump("Session #%d (%s)" % (i, session.session_id)) +sys.stderr.write("\n") def plotter(f, hostnames, field, logscale = False): plotlines = sorted(session.get_plot_row(field, hostnames) for session in sessions) @@ -267,18 +227,25 @@ def plot_one(hostnames, fields): gnuplot.stdin.write("set terminal pdf\n") gnuplot.stdin.write("set output 'analyze-rcynic-history.pdf'\n") for field in fields: - if field not in ("scaled_elapsed", "hostname"): + if field != "hostname": plotter(gnuplot.stdin, hostnames, field, logscale = False) plotter(gnuplot.stdin, hostnames, field, logscale = True) gnuplot.stdin.close() gnuplot.wait() if show_plot: + if plot_all_hosts: - hostnames = sorted(summary.hostnames) + hostnames = set() + for session in sessions: + hostnames.update(session.hostnames) + hostnames = sorted(hostnames) + else: - hostnames = ("rpki.apnic.net", "rpki.ripe.net", "repository.lacnic.net", "rpki.afrinic.net", "arin.rpki.net", "rgnet.rpki.net") - fields = [fmt.attr for fmt in Host.format if fmt.attr not in ("scaled_elapsed", "hostname")] + hostnames = ("rpki.apnic.net", "rpki.ripe.net", "repository.lacnic.net", + "rpki.afrinic.net", "arin.rpki.net", "rgnet.rpki.net") + + fields = [fmt.attr for fmt in Host.format if fmt.attr != "hostname"] if plot_to_one: plot_one(hostnames, fields) if plot_to_many: -- cgit v1.2.3 From f60cac5473f0fb956697105373ab35625cb40919 Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Sat, 4 Feb 2012 19:16:45 +0000 Subject: Extract latest rcynic.xml from mailbox. svn path=/trunk/; revision=4292 --- scripts/analyze-rcynic-history.py | 116 ++++++++++++++++++++------------------ 1 file changed, 61 insertions(+), 55 deletions(-) (limited to 'scripts/analyze-rcynic-history.py') diff --git a/scripts/analyze-rcynic-history.py b/scripts/analyze-rcynic-history.py index e06c9c7d..318c513c 100644 --- a/scripts/analyze-rcynic-history.py +++ b/scripts/analyze-rcynic-history.py @@ -19,10 +19,10 @@ OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. """ -show_plot = True -plot_all_hosts = False -plot_to_one = True -plot_to_many = True +plot_all_hosts = False +plot_to_one = True +plot_to_many = True +write_rcynic_xml = True import mailbox, sys, urlparse, os, getopt, datetime, subprocess @@ -30,7 +30,7 @@ from xml.etree.cElementTree import (ElementTree as ElementTree, fromstring as ElementTreeFromString) def parse_utc(s): - return datetime.datetime.strptime(s, "%Y-%m-%dT%H:%M:%SZ") + return datetime.datetime.strptime(s, "%Y-%m-%dT%H:%M:%SZ") class Rsync_History(object): """ @@ -48,9 +48,6 @@ class Host(object): A host object represents all the data collected for one host. Note that it (usually) contains a list of all the sessions in which this host appears. - - This is probably keeping far too much data, and needs to be pruned - to keep memory consumption to something sane. """ def __init__(self, hostname, session_id): @@ -130,15 +127,11 @@ class Session(dict): """ A session corresponds to one XML file. This is a dictionary of Host objects, keyed by hostname. - - We might need some kind of .finalize() method which throws away - unnecessary data to keep memory consumption down after we've read - the whole session. Most likely this would just be a pass through to - a Host.finalize() method which would do the real work. """ - def __init__(self, session_id = None): + def __init__(self, session_id, msg_key): self.session_id = session_id + self.msg_key = msg_key @property def hostnames(self): @@ -161,32 +154,6 @@ class Session(dict): for h in self.itervalues(): h.finalize() -mb = mailbox.Maildir("/u/sra/rpki/rcynic-xml", factory = None, create = False) - -sessions = [] - -for i, msg in enumerate(mb.itervalues()): - - sys.stderr.write("\r%s %d/%d..." % ("|\\-/"[i & 3], i, len(mb))) - - assert not msg.is_multipart() - - input = ElementTreeFromString(msg.get_payload()) - - session = Session(input.get("date")) - sessions.append(session) - - for elt in input.findall("rsync_history"): - session.add_rsync_history(Rsync_History(elt)) - - for elt in input.findall("validation_status"): - if elt.get("generation") == "current": - session.add_uri(elt.text.strip()) - - session.finalize() - -sys.stderr.write("\n") - def plotter(f, hostnames, field, logscale = False): plotlines = sorted(session.get_plot_row(field, hostnames) for session in sessions) title = Host.format_dict[field].title @@ -233,20 +200,59 @@ def plot_one(hostnames, fields): gnuplot.stdin.close() gnuplot.wait() -if show_plot: +mb = mailbox.Maildir("/u/sra/rpki/rcynic-xml", factory = None, create = False) - if plot_all_hosts: - hostnames = set() - for session in sessions: - hostnames.update(session.hostnames) - hostnames = sorted(hostnames) +sessions = [] - else: - hostnames = ("rpki.apnic.net", "rpki.ripe.net", "repository.lacnic.net", - "rpki.afrinic.net", "arin.rpki.net", "rgnet.rpki.net") - - fields = [fmt.attr for fmt in Host.format if fmt.attr != "hostname"] - if plot_to_one: - plot_one(hostnames, fields) - if plot_to_many: - plot_many(hostnames, fields) +latest = None + +for i, key in enumerate(mb.iterkeys(), 1): + + sys.stderr.write("\r%s %d/%d..." % ("|\\-/"[i & 3], i, len(mb))) + + assert not mb[key].is_multipart() + + input = ElementTreeFromString(mb[key].get_payload()) + + date = input.get("date") + + sys.stderr.write("%s..." % date) + + session = Session(date, key) + sessions.append(session) + + if latest is None or session.session_id > latest.session_id: + latest = session + + for elt in input.findall("rsync_history"): + session.add_rsync_history(Rsync_History(elt)) + + for elt in input.findall("validation_status"): + if elt.get("generation") == "current": + session.add_uri(elt.text.strip()) + + session.finalize() + +sys.stderr.write("\n") + +if plot_all_hosts: + hostnames = set() + for session in sessions: + hostnames.update(session.hostnames) + hostnames = sorted(hostnames) + +else: + hostnames = ("rpki.apnic.net", "rpki.ripe.net", "repository.lacnic.net", + "rpki.afrinic.net", "arin.rpki.net", "rgnet.rpki.net", + "rpki-pilot.arin.net") + +fields = [fmt.attr for fmt in Host.format if fmt.attr != "hostname"] +if plot_to_one: + plot_one(hostnames, fields) +if plot_to_many: + plot_many(hostnames, fields) + +if write_rcynic_xml and latest is not None: + f = open("rcynic.xml", "wb") + f.write(mb[latest.msg_key].get_payload()) + f.close() -- cgit v1.2.3 From 6c57b433896ce53b9875d1a834d938b6bd9e59d8 Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Sat, 4 Feb 2012 21:13:50 +0000 Subject: Change line style to linespoints. svn path=/trunk/; revision=4293 --- scripts/analyze-rcynic-history.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts/analyze-rcynic-history.py') diff --git a/scripts/analyze-rcynic-history.py b/scripts/analyze-rcynic-history.py index 318c513c..7d918198 100644 --- a/scripts/analyze-rcynic-history.py +++ b/scripts/analyze-rcynic-history.py @@ -173,7 +173,7 @@ def plotter(f, hostnames, field, logscale = False): #set format x '%m/%d' set format x '%b%d' #set title '""" + title + """' - plot""" + ",".join(" '-' using 1:2 with lines title '%s'" % h for h in hostnames) + "\n") + plot""" + ",".join(" '-' using 1:2 with linespoints pointinterval 500 title '%s'" % h for h in hostnames) + "\n") for i in xrange(1, n): for plotline in plotlines: f.write("%s %s\n" % (plotline[0], plotline[i].rstrip("%"))) -- cgit v1.2.3 From 0eecc7942adf27d15c3fc86ec51affed180abc7e Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Fri, 9 Mar 2012 03:45:42 +0000 Subject: Use Python "shelve" module to avoid processing XML we've already seen in a previous run, which speeds this tediously slow script up by something close to an order of magnitude. svn path=/trunk/; revision=4390 --- scripts/analyze-rcynic-history.py | 55 +++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 19 deletions(-) (limited to 'scripts/analyze-rcynic-history.py') diff --git a/scripts/analyze-rcynic-history.py b/scripts/analyze-rcynic-history.py index 7d918198..6e8d3598 100644 --- a/scripts/analyze-rcynic-history.py +++ b/scripts/analyze-rcynic-history.py @@ -24,7 +24,15 @@ plot_to_one = True plot_to_many = True write_rcynic_xml = True -import mailbox, sys, urlparse, os, getopt, datetime, subprocess +import mailbox +import sys +import urlparse +import os +import getopt +import datetime +import subprocess +import shelve +import whichdb from xml.etree.cElementTree import (ElementTree as ElementTree, fromstring as ElementTreeFromString) @@ -202,39 +210,46 @@ def plot_one(hostnames, fields): mb = mailbox.Maildir("/u/sra/rpki/rcynic-xml", factory = None, create = False) +gdbm_file = "rcynic-xml.gdbm" + +# Disgusting workaround for dumb bug, see http://bugs.python.org/issue13007 +if whichdb.whichdb(gdbm_file) == "": + whichdb.whichdb = lambda filename: "gdbm" + +shelf = shelve.open(gdbm_file) + sessions = [] latest = None for i, key in enumerate(mb.iterkeys(), 1): - sys.stderr.write("\r%s %d/%d..." % ("|\\-/"[i & 3], i, len(mb))) - assert not mb[key].is_multipart() + if key in shelf: + session = shelf[key] - input = ElementTreeFromString(mb[key].get_payload()) - - date = input.get("date") - - sys.stderr.write("%s..." % date) + else: + assert not mb[key].is_multipart() + input = ElementTreeFromString(mb[key].get_payload()) + date = input.get("date") + sys.stderr.write("%s..." % date) + session = Session(date, key) + for elt in input.findall("rsync_history"): + session.add_rsync_history(Rsync_History(elt)) + for elt in input.findall("validation_status"): + if elt.get("generation") == "current": + session.add_uri(elt.text.strip()) + session.finalize() + shelf[key] = session - session = Session(date, key) sessions.append(session) - if latest is None or session.session_id > latest.session_id: latest = session - for elt in input.findall("rsync_history"): - session.add_rsync_history(Rsync_History(elt)) - - for elt in input.findall("validation_status"): - if elt.get("generation") == "current": - session.add_uri(elt.text.strip()) - - session.finalize() - sys.stderr.write("\n") +shelf.sync() + if plot_all_hosts: hostnames = set() for session in sessions: @@ -256,3 +271,5 @@ if write_rcynic_xml and latest is not None: f = open("rcynic.xml", "wb") f.write(mb[latest.msg_key].get_payload()) f.close() + +shelf.close() -- cgit v1.2.3 From 59b210430f9f3a8461cc8122bd21d35d5691a9a7 Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Sun, 11 Mar 2012 02:53:15 +0000 Subject: Use gdbm module directly instead of whacky anydb module. svn path=/trunk/; revision=4394 --- scripts/analyze-rcynic-history.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'scripts/analyze-rcynic-history.py') diff --git a/scripts/analyze-rcynic-history.py b/scripts/analyze-rcynic-history.py index 6e8d3598..fd254d2f 100644 --- a/scripts/analyze-rcynic-history.py +++ b/scripts/analyze-rcynic-history.py @@ -32,7 +32,7 @@ import getopt import datetime import subprocess import shelve -import whichdb +import gdbm from xml.etree.cElementTree import (ElementTree as ElementTree, fromstring as ElementTreeFromString) @@ -210,13 +210,7 @@ def plot_one(hostnames, fields): mb = mailbox.Maildir("/u/sra/rpki/rcynic-xml", factory = None, create = False) -gdbm_file = "rcynic-xml.gdbm" - -# Disgusting workaround for dumb bug, see http://bugs.python.org/issue13007 -if whichdb.whichdb(gdbm_file) == "": - whichdb.whichdb = lambda filename: "gdbm" - -shelf = shelve.open(gdbm_file) +shelf = shelve.Shelf(gdbm.open("rcynic-xml.gdbm", "c")) sessions = [] -- cgit v1.2.3 From 13bc9909d5ed2f246c0e6eb21f69e205f609e64e Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Thu, 15 Mar 2012 04:12:39 +0000 Subject: Add ylabels; cleanup. svn path=/trunk/; revision=4397 --- scripts/analyze-rcynic-history.py | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) (limited to 'scripts/analyze-rcynic-history.py') diff --git a/scripts/analyze-rcynic-history.py b/scripts/analyze-rcynic-history.py index fd254d2f..2b8da14c 100644 --- a/scripts/analyze-rcynic-history.py +++ b/scripts/analyze-rcynic-history.py @@ -20,7 +20,7 @@ PERFORMANCE OF THIS SOFTWARE. """ plot_all_hosts = False -plot_to_one = True +plot_to_one = False plot_to_many = True write_rcynic_xml = True @@ -98,12 +98,13 @@ class Host(object): class Format(object): - def __init__(self, attr, title, fmt): + def __init__(self, attr, title, fmt, ylabel = ""): self.attr = attr self.title = title self.width = len(title) - int("%" in fmt) self.fmt = "%%%d%s" % (self.width, fmt) self.oops = "*" * self.width + self.ylabel = ylabel def __call__(self, obj): try: @@ -111,21 +112,14 @@ class Host(object): except ZeroDivisionError: return self.oops - format = (Format("connection_count", "Connections", "d"), - Format("object_count", "Objects", "d"), - Format("objects_per_connection", "Objects/Connection", ".3f"), - Format("seconds_per_object", "Seconds/Object", ".3f"), - Format("failure_rate_percentage", "Failure Rate", ".3f%%"), - Format("average_connection_time", "Average Connection", ".3f"), + format = (Format("connection_count", "Connections", "d", "Connections To Repository (Per Session)"), + Format("object_count", "Objects", "d", "Objects In Repository (Distinct URIs Per Session)"), + Format("objects_per_connection", "Objects/Connection", ".3f", "Objects In Repository / Connections To Repository"), + Format("seconds_per_object", "Seconds/Object", ".3f", "Seconds To Transfer / Object (Average Per Session)"), + Format("failure_rate_percentage", "Failure Rate", ".3f%%", "Connection Failures / Connections (Per Session)"), + Format("average_connection_time", "Average Connection", ".3f", "Seconds / Connection (Average Per Session)"), Format("hostname", "Hostname", "s")) - separator = " " * 2 - - header = separator.join(fmt.title for fmt in format) - - def __str__(self): - return self.separator.join(fmt(self) for fmt in self.format) - format_dict = dict((fmt.attr, fmt) for fmt in format) def format_field(self, name): @@ -165,6 +159,7 @@ class Session(dict): def plotter(f, hostnames, field, logscale = False): plotlines = sorted(session.get_plot_row(field, hostnames) for session in sessions) title = Host.format_dict[field].title + ylabel = Host.format_dict[field].ylabel n = len(hostnames) + 1 assert all(n == len(plotline) for plotline in plotlines) if "%%" in Host.format_dict[field].fmt: @@ -181,6 +176,7 @@ def plotter(f, hostnames, field, logscale = False): #set format x '%m/%d' set format x '%b%d' #set title '""" + title + """' + set ylabel '""" + ylabel + """' plot""" + ",".join(" '-' using 1:2 with linespoints pointinterval 500 title '%s'" % h for h in hostnames) + "\n") for i in xrange(1, n): for plotline in plotlines: @@ -245,15 +241,14 @@ sys.stderr.write("\n") shelf.sync() if plot_all_hosts: - hostnames = set() - for session in sessions: - hostnames.update(session.hostnames) - hostnames = sorted(hostnames) + hostnames = sorted(reduce(lambda x, y: x | y, + (s.hostnames for s in sessions), + set())) else: hostnames = ("rpki.apnic.net", "rpki.ripe.net", "repository.lacnic.net", - "rpki.afrinic.net", "arin.rpki.net", "rgnet.rpki.net", - "rpki-pilot.arin.net") + "rpki.afrinic.net", "rpki-pilot.arin.net", + "arin.rpki.net", "rgnet.rpki.net") fields = [fmt.attr for fmt in Host.format if fmt.attr != "hostname"] if plot_to_one: -- cgit v1.2.3 From 53b1ea1ae416245aa69a0ca5e61a69ed7c7d0946 Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Thu, 15 Mar 2012 05:16:17 +0000 Subject: No gdbm on MacOSX. svn path=/trunk/; revision=4398 --- scripts/analyze-rcynic-history.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'scripts/analyze-rcynic-history.py') diff --git a/scripts/analyze-rcynic-history.py b/scripts/analyze-rcynic-history.py index 2b8da14c..88a55cf9 100644 --- a/scripts/analyze-rcynic-history.py +++ b/scripts/analyze-rcynic-history.py @@ -32,7 +32,6 @@ import getopt import datetime import subprocess import shelve -import gdbm from xml.etree.cElementTree import (ElementTree as ElementTree, fromstring as ElementTreeFromString) @@ -206,7 +205,10 @@ def plot_one(hostnames, fields): mb = mailbox.Maildir("/u/sra/rpki/rcynic-xml", factory = None, create = False) -shelf = shelve.Shelf(gdbm.open("rcynic-xml.gdbm", "c")) +if sys.platform == "darwin": # Sigh + shelf = shelve.open("rcynic-xml", "c") +else: + shelf = shelve.open("rcynic-xml.db", "c") sessions = [] -- cgit v1.2.3 From 66025dd6240f3d1ed55eb9aac9c9c2b5ba829ca4 Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Thu, 15 Mar 2012 12:29:42 +0000 Subject: Sigh, make work with Python 2.6 again. svn path=/trunk/; revision=4399 --- scripts/analyze-rcynic-history.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'scripts/analyze-rcynic-history.py') diff --git a/scripts/analyze-rcynic-history.py b/scripts/analyze-rcynic-history.py index 88a55cf9..fdf850cc 100644 --- a/scripts/analyze-rcynic-history.py +++ b/scripts/analyze-rcynic-history.py @@ -85,7 +85,9 @@ class Host(object): @property def seconds_per_object(self): - return float(self.elapsed.total_seconds()) / float(self.object_count) + return float(self.elapsed.days * 24 * 60 * 60 + + self.elapsed.seconds + + self.elapsed.microseconds / 10**6) / float(self.object_count) @property def objects_per_connection(self): @@ -93,7 +95,9 @@ class Host(object): @property def average_connection_time(self): - return float(self.total_connection_time.total_seconds()) / float(self.connection_count) + return float(self.total_connection_time.days * 24 * 60 * 60 + + self.total_connection_time.seconds + + self.total_connection_time.microseconds / 10**6) / float(self.connection_count) class Format(object): -- cgit v1.2.3 From d559bc6a29311fa1c414ce8cc3632f5f74ee8485 Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Fri, 16 Mar 2012 06:21:18 +0000 Subject: Rework failure rate computation to use rolling 72 hour window, as previous metric was incomprehensible. svn path=/trunk/; revision=4401 --- scripts/analyze-rcynic-history.py | 61 ++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 27 deletions(-) (limited to 'scripts/analyze-rcynic-history.py') diff --git a/scripts/analyze-rcynic-history.py b/scripts/analyze-rcynic-history.py index fdf850cc..f45a0578 100644 --- a/scripts/analyze-rcynic-history.py +++ b/scripts/analyze-rcynic-history.py @@ -19,10 +19,9 @@ OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. """ -plot_all_hosts = False -plot_to_one = False -plot_to_many = True -write_rcynic_xml = True +plot_all_hosts = False + +window_hours = 72 import mailbox import sys @@ -80,8 +79,8 @@ class Host(object): del self.uris @property - def failure_rate_percentage(self): - return float(self.dead_connections * 100) / float(self.connection_count) + def failed(self): + return 1 if self.dead_connections else 0 @property def seconds_per_object(self): @@ -119,7 +118,7 @@ class Host(object): Format("object_count", "Objects", "d", "Objects In Repository (Distinct URIs Per Session)"), Format("objects_per_connection", "Objects/Connection", ".3f", "Objects In Repository / Connections To Repository"), Format("seconds_per_object", "Seconds/Object", ".3f", "Seconds To Transfer / Object (Average Per Session)"), - Format("failure_rate_percentage", "Failure Rate", ".3f%%", "Connection Failures / Connections (Per Session)"), + Format("failure_rate_running", "Failure Rate", ".3f%%", "Sessions With Failed Connections Within Last %d Hours" % window_hours), Format("average_connection_time", "Average Connection", ".3f", "Seconds / Connection (Average Per Session)"), Format("hostname", "Hostname", "s")) @@ -137,6 +136,8 @@ class Session(dict): def __init__(self, session_id, msg_key): self.session_id = session_id self.msg_key = msg_key + self.date = parse_utc(session_id) + self.calculated_failure_history = False @property def hostnames(self): @@ -159,6 +160,18 @@ class Session(dict): for h in self.itervalues(): h.finalize() + def calculate_failure_history(self, sessions): + start = self.date - datetime.timedelta(hours = window_hours) + sessions = tuple(s for s in sessions if s.date <= self.date and s.date > start) + for hostname, h in self.iteritems(): + i = n = 0 + for s in sessions: + if hostname in s: + i += s[hostname].failed + n += 1 + h.failure_rate_running = float(100 * i) / n + self.calculated_failure_history = True + def plotter(f, hostnames, field, logscale = False): plotlines = sorted(session.get_plot_row(field, hostnames) for session in sessions) title = Host.format_dict[field].title @@ -177,7 +190,9 @@ def plotter(f, hostnames, field, logscale = False): set xdata time set timefmt '%Y-%m-%dT%H:%M:%SZ' #set format x '%m/%d' - set format x '%b%d' + #set format x '%b%d' + #set format x '%Y-%m-%d' + set format x '%Y-%m' #set title '""" + title + """' set ylabel '""" + ylabel + """' plot""" + ",".join(" '-' using 1:2 with linespoints pointinterval 500 title '%s'" % h for h in hostnames) + "\n") @@ -186,7 +201,7 @@ def plotter(f, hostnames, field, logscale = False): f.write("%s %s\n" % (plotline[0], plotline[i].rstrip("%"))) f.write("e\n") -def plot_many(hostnames, fields): +def plot_hosts(hostnames, fields): for field in fields: for logscale in (False, True): gnuplot = subprocess.Popen(("gnuplot",), stdin = subprocess.PIPE) @@ -196,17 +211,6 @@ def plot_many(hostnames, fields): gnuplot.stdin.close() gnuplot.wait() -def plot_one(hostnames, fields): - gnuplot = subprocess.Popen(("gnuplot",), stdin = subprocess.PIPE) - gnuplot.stdin.write("set terminal pdf\n") - gnuplot.stdin.write("set output 'analyze-rcynic-history.pdf'\n") - for field in fields: - if field != "hostname": - plotter(gnuplot.stdin, hostnames, field, logscale = False) - plotter(gnuplot.stdin, hostnames, field, logscale = True) - gnuplot.stdin.close() - gnuplot.wait() - mb = mailbox.Maildir("/u/sra/rpki/rcynic-xml", factory = None, create = False) if sys.platform == "darwin": # Sigh @@ -219,7 +223,7 @@ sessions = [] latest = None for i, key in enumerate(mb.iterkeys(), 1): - sys.stderr.write("\r%s %d/%d..." % ("|\\-/"[i & 3], i, len(mb))) + sys.stderr.write("\r%s Reading %d/%d..." % ("|\\-/"[i & 3], i, len(mb))) if key in shelf: session = shelf[key] @@ -246,6 +250,13 @@ sys.stderr.write("\n") shelf.sync() +for i, session in enumerate(sessions, 1): + sys.stderr.write("\r%s Failure history %d/%d...%s..." % ("|\\-/"[i & 3], i, len(sessions), session.session_id)) + if not getattr(session, "calculated_failure_history", False): + session.calculate_failure_history(sessions) + shelf[session.msg_key] = session +sys.stderr.write("\n") + if plot_all_hosts: hostnames = sorted(reduce(lambda x, y: x | y, (s.hostnames for s in sessions), @@ -256,13 +267,9 @@ else: "rpki.afrinic.net", "rpki-pilot.arin.net", "arin.rpki.net", "rgnet.rpki.net") -fields = [fmt.attr for fmt in Host.format if fmt.attr != "hostname"] -if plot_to_one: - plot_one(hostnames, fields) -if plot_to_many: - plot_many(hostnames, fields) +plot_hosts(hostnames, [fmt.attr for fmt in Host.format if fmt.attr != "hostname"]) -if write_rcynic_xml and latest is not None: +if latest is not None: f = open("rcynic.xml", "wb") f.write(mb[latest.msg_key].get_payload()) f.close() -- cgit v1.2.3