Source tree reorg, phase 1. Almost everything moved, no file contents changed.

svn path=/branches/tk685/; revision=5757
author: Rob Austein <sra@hactrn.net> 2014-04-05 22:42:12 +0000
committer: Rob Austein <sra@hactrn.net> 2014-04-05 22:42:12 +0000
commit: fe0bf509f528dbdc50c7182f81057c6a4e15e4bd (patch)
tree: 07c9a923d4a0ccdfea11c49cd284f6d5757c5eda /potpourri/ripe-to-csv.awk
parent: aa28ef54c271fbe4d52860ff8cf13cab19e2207c (diff)
1 files changed, 124 insertions, 0 deletions
diff --git a/potpourri/ripe-to-csv.awk b/potpourri/ripe-to-csv.awk
new file mode 100644
index 00000000..5325574f
--- /dev/null
+++ b/potpourri/ripe-to-csv.awk
@@ -0,0 +1,124 @@
+#!/usr/bin/awk -f
+
+# Parse a WHOIS research dump and write out (just) the RPKI-relevant
+# fields in myrpki-format CSV syntax.
+#
+# Unfortunately, unlike the ARIN and APNIC databases, the RIPE database
+# doesn't really have any useful concept of an organizational handle.
+# More precisely, while it has handles out the wazoo, none of them are
+# useful as a reliable grouping mechanism for tracking which set of
+# resources are held by a particular organization.  So, instead of being
+# able to track all of an organization's resources with a single handle
+# as we can in the ARIN and APNIC databases, the best we can do with the
+# RIPE database is to track individual resources, each with its own
+# resource handle.  Well, for prefixes -- ASN entries behave more like
+# in the ARIN and APNIC databases.
+#
+# This is an AWK script rather than a Python script because it is a
+# fairly simple stream parser that has to process a ridiculous amount
+# of text.  AWK turns out to be significantly faster for this.
+#
+# NB: The input data for this script is publicly available via FTP, but
+# you'll have to fetch the data from RIPE yourself, and be sure to see
+# the terms and conditions referenced by the data file header comments.
+# 
+# $Id$
+#
+# Copyright (C) 2009-2010  Internet Systems Consortium ("ISC")
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+# AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE.
+
+# On input, ":" is the most useful delimiter
+# On output, we want tab-delimited text.
+BEGIN {
+    FS = "[ \t]*:";
+    OFS = "\t";
+}
+
+# Clean up trailing whitespace.
+{
+    sub(/[ \t]+$/, "");
+}
+
+# Continuation line: strip comment, if any, then append value, if any,
+# to what we had from previous line(s).
+/^[^A-Z]/ {
+    sub(/[ \t]*#.*$/, "");
+    if (NF)
+	val = val $0;
+    next;
+}
+
+# Anything other than line continuation terminates the previous line,
+# so if we were working on a line, we're done with it now, process it.
+key {
+    do_line();
+}
+
+# Non-empty line and we have no tag, this must be start of a new block.
+NF && !tag {
+    tag = $1;
+}
+
+# One of the tags we care about, clean up and save the data.
+/^(AS-NAME|AUT-NUM|INET6NUM|INETNUM|MNT-BY|NETNAME|STATUS):/ {
+    key = $1;
+    sub(/^[^ \t]+:/, "");
+    sub(/[ \t]*#.*$/, "");
+    val = $0;
+}
+
+# Blank line and we have something, process it.
+!NF && tag {
+    do_block();
+}
+
+# End of file, process final data, if any.
+END {
+    do_line();
+    do_block();
+}
+
+# Handle one line, after line icky RPSL continuation.
+function do_line() {
+    gsub(/[ \t]/, "", val);
+    if (key && val)
+	tags[key] = val;
+    key = "";
+    val = "";
+}
+
+# Dispatch to handle known block types, then clean up so we can start
+# a new block.
+function do_block() {
+    if (tag == "INETNUM" || tag == "INET6NUM")
+	do_prefix();
+    else if (tag == "AUT-NUM")
+	do_asn();
+    delete tags;
+    tag = "";
+}
+
+# Handle an AUT-NUM block: extract the ASN, use MNT-BY as the handle.
+function do_asn() {
+    sub(/^AS/, "", tags[tag]);
+    if (tags["MNT-BY"] && tags[tag])
+	print tags["MNT-BY"], tags[tag] >"asns.csv";
+}
+
+# Handle an INETNUM or INET6NUM block: check for the status values we
+# care about, use NETNAME as the handle.
+function do_prefix() {
+    if (tags["STATUS"] ~ /^ASSIGNED(P[AI])$/ && tags["NETNAME"] && tags[tag])
+	print tags["NETNAME"], tags[tag] >"prefixes.csv";
+}
author	Rob Austein <sra@hactrn.net>	2014-04-05 22:42:12 +0000
committer	Rob Austein <sra@hactrn.net>	2014-04-05 22:42:12 +0000
commit	fe0bf509f528dbdc50c7182f81057c6a4e15e4bd (patch)
tree	07c9a923d4a0ccdfea11c49cd284f6d5757c5eda /potpourri/ripe-to-csv.awk
parent	aa28ef54c271fbe4d52860ff8cf13cab19e2207c (diff)