diff options
author | Rob Austein <sra@hactrn.net> | 2010-10-26 17:23:27 +0000 |
---|---|---|
committer | Rob Austein <sra@hactrn.net> | 2010-10-26 17:23:27 +0000 |
commit | e23dab52b6b7eef868bfbee2842aa6fc924050cc (patch) | |
tree | 35b143375cbb4bc5f0f9e4b8518f488b2490e575 | |
parent | a490df50b7a2d230a38be9823280c308631c4414 (diff) |
AWK appears to run an order of magnitude faster than Python for this task, sigh.
svn path=/scripts/ripe-to-csv.awk; revision=3493
-rw-r--r-- | scripts/ripe-to-csv.awk | 85 | ||||
-rw-r--r-- | scripts/ripe-to-csv.py | 14 |
2 files changed, 92 insertions, 7 deletions
diff --git a/scripts/ripe-to-csv.awk b/scripts/ripe-to-csv.awk new file mode 100644 index 00000000..bc0ab8e5 --- /dev/null +++ b/scripts/ripe-to-csv.awk @@ -0,0 +1,85 @@ +#!/usr/bin/awk -f + +# Parse a WHOIS research dump and write out (just) the RPKI-relevant +# fields in myrpki-format CSV syntax. +# +# Unfortunately, unlike the ARIN and APNIC databases, the RIPE database +# doesn't really have any useful concept of an organizational handle. +# More precisely, while it has handles out the wazoo, none of them are +# useful as a reliable grouping mechanism for tracking which set of +# resources are held by a particular organization. So, instead of being +# able to track all of an organization's resources with a single handle +# as we can in the ARIN and APNIC databases, the best we can do with the +# RIPE database is to track individual resources, each with its own +# resource handle. Well, for prefixes -- ASN entries behave more like +# in the ARIN and APNIC databases. +# +# Feh. +# +# NB: The input data for this script is publicly available via FTP, but +# you'll have to fetch the data from RIPE yourself, and be sure to see +# the terms and conditions referenced by the data file header comments. +# +# $Id$ +# +# Copyright (C) 2009-2010 Internet Systems Consortium ("ISC") +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, +# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. + +BEGIN { + FS = "[ \t]*:"; + OFS = "\t"; +} + +{ + sub(/#.*$/, ""); + sub(/[ \t]+$/, ""); +} + +NF && !tag { + tag = $1; +} + +/^(as-name|aut-num|inet6num|inetnum|mnt-by|netname|status):/ { + t = $1; + sub(/^[^ \t]+:/, ""); + gsub(/[ \t]/, ""); + tags[t] = $0; +} + +!NF && tag { + got_one(); +} + +END { + got_one(); +} + +function got_one() { + if (tag == "inetnum" || tag == "inet6num") + got_inetnum(); + else if (tag == "aut-num") + got_aut_num(); + delete tags; + tag = ""; +} + +function got_aut_num() { + sub(/^AS/, "", tags["aut-num"]); + print tags["mnt-by"], tags["aut-num"] >"asns.csv"; +} + +function got_inetnum() { + if (tags["status"] ~ /^ASSIGNED(P[AI])?$/) + print tags["netname"], tags[tag] >"prefixes.csv"; +} diff --git a/scripts/ripe-to-csv.py b/scripts/ripe-to-csv.py index 59bfdc25..e04473d9 100644 --- a/scripts/ripe-to-csv.py +++ b/scripts/ripe-to-csv.py @@ -21,7 +21,7 @@ the terms and conditions referenced by the data file header comments. $Id$ -Copyright (C) 2009 Internet Systems Consortium ("ISC") +Copyright (C) 2009-2010 Internet Systems Consortium ("ISC") Permission to use, copy, modify, and distribute this software for any purpose with or without fee is hereby granted, provided that the above @@ -70,7 +70,7 @@ class Handle(dict): self.check() class aut_num(Handle): - want_tags = ("aut-num", "mnt-by", "as-name") + want_tags = ("aut-num", "mnt-by") # "as-name" def set(self, tag, val): if tag == "aut-num" and val.startswith("AS"): @@ -79,21 +79,21 @@ class aut_num(Handle): def finish(self, ctx): if self.check(): - ctx.asns.writerow((self["as-name"], self["mnt-by"], self["aut-num"])) + ctx.asns.writerow((self["mnt-by"], self["aut-num"])) class inetnum(Handle): - want_tags = ("inetnum", "mnt-by", "netname", "status") + want_tags = ("inetnum", "netname", "status") # "mnt-by" def finish(self, ctx): if self.check() and self["status"] in self.want_status: - ctx.prefixes.writerow((self["netname"], self["mnt-by"], self["inetnum"])) + ctx.prefixes.writerow((self["netname"], self["inetnum"])) class inet6num(Handle): - want_tags = ("inet6num", "mnt-by", "netname", "status") + want_tags = ("inet6num", "netname", "status") # "mnt-by" def finish(self, ctx): if self.check() and self["status"] in self.want_status: - ctx.prefixes.writerow((self["netname"], self["mnt-by"], self["inet6num"])) + ctx.prefixes.writerow((self["netname"], self["inet6num"])) class main(object): |