aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Austein <sra@hactrn.net>2010-10-26 17:23:27 +0000
committerRob Austein <sra@hactrn.net>2010-10-26 17:23:27 +0000
commite23dab52b6b7eef868bfbee2842aa6fc924050cc (patch)
tree35b143375cbb4bc5f0f9e4b8518f488b2490e575
parenta490df50b7a2d230a38be9823280c308631c4414 (diff)
AWK appears to run an order of magnitude faster than Python for this task, sigh.
svn path=/scripts/ripe-to-csv.awk; revision=3493
-rw-r--r--scripts/ripe-to-csv.awk85
-rw-r--r--scripts/ripe-to-csv.py14
2 files changed, 92 insertions, 7 deletions
diff --git a/scripts/ripe-to-csv.awk b/scripts/ripe-to-csv.awk
new file mode 100644
index 00000000..bc0ab8e5
--- /dev/null
+++ b/scripts/ripe-to-csv.awk
@@ -0,0 +1,85 @@
+#!/usr/bin/awk -f
+
+# Parse a WHOIS research dump and write out (just) the RPKI-relevant
+# fields in myrpki-format CSV syntax.
+#
+# Unfortunately, unlike the ARIN and APNIC databases, the RIPE database
+# doesn't really have any useful concept of an organizational handle.
+# More precisely, while it has handles out the wazoo, none of them are
+# useful as a reliable grouping mechanism for tracking which set of
+# resources are held by a particular organization. So, instead of being
+# able to track all of an organization's resources with a single handle
+# as we can in the ARIN and APNIC databases, the best we can do with the
+# RIPE database is to track individual resources, each with its own
+# resource handle. Well, for prefixes -- ASN entries behave more like
+# in the ARIN and APNIC databases.
+#
+# Feh.
+#
+# NB: The input data for this script is publicly available via FTP, but
+# you'll have to fetch the data from RIPE yourself, and be sure to see
+# the terms and conditions referenced by the data file header comments.
+#
+# $Id$
+#
+# Copyright (C) 2009-2010 Internet Systems Consortium ("ISC")
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+# AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE.
+
+BEGIN {
+ FS = "[ \t]*:";
+ OFS = "\t";
+}
+
+{
+ sub(/#.*$/, "");
+ sub(/[ \t]+$/, "");
+}
+
+NF && !tag {
+ tag = $1;
+}
+
+/^(as-name|aut-num|inet6num|inetnum|mnt-by|netname|status):/ {
+ t = $1;
+ sub(/^[^ \t]+:/, "");
+ gsub(/[ \t]/, "");
+ tags[t] = $0;
+}
+
+!NF && tag {
+ got_one();
+}
+
+END {
+ got_one();
+}
+
+function got_one() {
+ if (tag == "inetnum" || tag == "inet6num")
+ got_inetnum();
+ else if (tag == "aut-num")
+ got_aut_num();
+ delete tags;
+ tag = "";
+}
+
+function got_aut_num() {
+ sub(/^AS/, "", tags["aut-num"]);
+ print tags["mnt-by"], tags["aut-num"] >"asns.csv";
+}
+
+function got_inetnum() {
+ if (tags["status"] ~ /^ASSIGNED(P[AI])?$/)
+ print tags["netname"], tags[tag] >"prefixes.csv";
+}
diff --git a/scripts/ripe-to-csv.py b/scripts/ripe-to-csv.py
index 59bfdc25..e04473d9 100644
--- a/scripts/ripe-to-csv.py
+++ b/scripts/ripe-to-csv.py
@@ -21,7 +21,7 @@ the terms and conditions referenced by the data file header comments.
$Id$
-Copyright (C) 2009 Internet Systems Consortium ("ISC")
+Copyright (C) 2009-2010 Internet Systems Consortium ("ISC")
Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
@@ -70,7 +70,7 @@ class Handle(dict):
self.check()
class aut_num(Handle):
- want_tags = ("aut-num", "mnt-by", "as-name")
+ want_tags = ("aut-num", "mnt-by") # "as-name"
def set(self, tag, val):
if tag == "aut-num" and val.startswith("AS"):
@@ -79,21 +79,21 @@ class aut_num(Handle):
def finish(self, ctx):
if self.check():
- ctx.asns.writerow((self["as-name"], self["mnt-by"], self["aut-num"]))
+ ctx.asns.writerow((self["mnt-by"], self["aut-num"]))
class inetnum(Handle):
- want_tags = ("inetnum", "mnt-by", "netname", "status")
+ want_tags = ("inetnum", "netname", "status") # "mnt-by"
def finish(self, ctx):
if self.check() and self["status"] in self.want_status:
- ctx.prefixes.writerow((self["netname"], self["mnt-by"], self["inetnum"]))
+ ctx.prefixes.writerow((self["netname"], self["inetnum"]))
class inet6num(Handle):
- want_tags = ("inet6num", "mnt-by", "netname", "status")
+ want_tags = ("inet6num", "netname", "status") # "mnt-by"
def finish(self, ctx):
if self.check() and self["status"] in self.want_status:
- ctx.prefixes.writerow((self["netname"], self["mnt-by"], self["inet6num"]))
+ ctx.prefixes.writerow((self["netname"], self["inet6num"]))
class main(object):