diff options
author | Rob Austein <sra@hactrn.net> | 2014-04-05 22:42:12 +0000 |
---|---|---|
committer | Rob Austein <sra@hactrn.net> | 2014-04-05 22:42:12 +0000 |
commit | fe0bf509f528dbdc50c7182f81057c6a4e15e4bd (patch) | |
tree | 07c9a923d4a0ccdfea11c49cd284f6d5757c5eda /potpourri/ripe-to-csv.py | |
parent | aa28ef54c271fbe4d52860ff8cf13cab19e2207c (diff) |
Source tree reorg, phase 1. Almost everything moved, no file contents changed.
svn path=/branches/tk685/; revision=5757
Diffstat (limited to 'potpourri/ripe-to-csv.py')
-rw-r--r-- | potpourri/ripe-to-csv.py | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/potpourri/ripe-to-csv.py b/potpourri/ripe-to-csv.py new file mode 100644 index 00000000..b864345b --- /dev/null +++ b/potpourri/ripe-to-csv.py @@ -0,0 +1,138 @@ +# $Id$ +# +# Copyright (C) 2009-2012 Internet Systems Consortium ("ISC") +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, +# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. + +""" +Parse a WHOIS research dump and write out (just) the RPKI-relevant +fields in myrpki-format CSV syntax. + +Unfortunately, unlike the ARIN and APNIC databases, the RIPE database +doesn't really have any useful concept of an organizational handle. +More precisely, while it has handles out the wazoo, none of them are +useful as a reliable grouping mechanism for tracking which set of +resources are held by a particular organization. So, instead of being +able to track all of an organization's resources with a single handle +as we can in the ARIN and APNIC databases, the best we can do with the +RIPE database is to track individual resources, each with its own +resource handle. Well, for prefixes -- ASN entries behave more like +in the ARIN and APNIC databases. + +Feh. + +NB: The input data for this script is publicly available via FTP, but +you'll have to fetch the data from RIPE yourself, and be sure to see +the terms and conditions referenced by the data file header comments. +""" + +import gzip +from rpki.csv_utils import csv_writer + +class Handle(dict): + + want_tags = () + + want_status = ("ASSIGNED", "ASSIGNEDPA", "ASSIGNEDPI") + + debug = False + + def set(self, tag, val): + if tag in self.want_tags: + self[tag] = "".join(val.split(" ")) + + def check(self): + for tag in self.want_tags: + if not tag in self: + return False + if self.debug: + self.log() + return True + + def __repr__(self): + return "<%s %s>" % (self.__class__.__name__, + " ".join("%s:%s" % (tag, self.get(tag, "?")) + for tag in self.want_tags)) + + def log(self): + print repr(self) + + def finish(self, ctx): + self.check() + +class aut_num(Handle): + want_tags = ("aut-num", "mnt-by") # "as-name" + + def set(self, tag, val): + if tag == "aut-num" and val.startswith("AS"): + val = val[2:] + Handle.set(self, tag, val) + + def finish(self, ctx): + if self.check(): + ctx.asns.writerow((self["mnt-by"], self["aut-num"])) + +class inetnum(Handle): + want_tags = ("inetnum", "netname", "status") # "mnt-by" + + def finish(self, ctx): + if self.check() and self["status"] in self.want_status: + ctx.prefixes.writerow((self["netname"], self["inetnum"])) + +class inet6num(Handle): + want_tags = ("inet6num", "netname", "status") # "mnt-by" + + def finish(self, ctx): + if self.check() and self["status"] in self.want_status: + ctx.prefixes.writerow((self["netname"], self["inet6num"])) + +class main(object): + + types = dict((x.want_tags[0], x) for x in (aut_num, inetnum, inet6num)) + + def finish_statement(self, done): + if self.statement: + tag, sep, val = self.statement.partition(":") + assert sep, "Couldn't find separator in %r" % self.statement + tag = tag.strip().lower() + val = val.strip().upper() + if self.cur is None: + self.cur = self.types[tag]() if tag in self.types else False + if self.cur is not False: + self.cur.set(tag, val) + if done and self.cur: + self.cur.finish(self) + self.cur = None + + filenames = ("ripe.db.aut-num.gz", "ripe.db.inet6num.gz", "ripe.db.inetnum.gz") + + def __init__(self): + self.asns = csv_writer("asns.csv") + self.prefixes = csv_writer("prefixes.csv") + for fn in self.filenames: + f = gzip.open(fn) + self.statement = "" + self.cur = None + for line in f: + line = line.expandtabs().partition("#")[0].rstrip("\n") + if line and not line[0].isalpha(): + self.statement += line[1:] if line[0] == "+" else line + else: + self.finish_statement(not line) + self.statement = line + self.finish_statement(True) + f.close() + self.asns.close() + self.prefixes.close() + +main() |