diff options
Diffstat (limited to 'potpourri/ripe-to-csv.awk')
-rw-r--r-- | potpourri/ripe-to-csv.awk | 124 |
1 files changed, 124 insertions, 0 deletions
diff --git a/potpourri/ripe-to-csv.awk b/potpourri/ripe-to-csv.awk new file mode 100644 index 00000000..5325574f --- /dev/null +++ b/potpourri/ripe-to-csv.awk @@ -0,0 +1,124 @@ +#!/usr/bin/awk -f + +# Parse a WHOIS research dump and write out (just) the RPKI-relevant +# fields in myrpki-format CSV syntax. +# +# Unfortunately, unlike the ARIN and APNIC databases, the RIPE database +# doesn't really have any useful concept of an organizational handle. +# More precisely, while it has handles out the wazoo, none of them are +# useful as a reliable grouping mechanism for tracking which set of +# resources are held by a particular organization. So, instead of being +# able to track all of an organization's resources with a single handle +# as we can in the ARIN and APNIC databases, the best we can do with the +# RIPE database is to track individual resources, each with its own +# resource handle. Well, for prefixes -- ASN entries behave more like +# in the ARIN and APNIC databases. +# +# This is an AWK script rather than a Python script because it is a +# fairly simple stream parser that has to process a ridiculous amount +# of text. AWK turns out to be significantly faster for this. +# +# NB: The input data for this script is publicly available via FTP, but +# you'll have to fetch the data from RIPE yourself, and be sure to see +# the terms and conditions referenced by the data file header comments. +# +# $Id$ +# +# Copyright (C) 2009-2010 Internet Systems Consortium ("ISC") +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, +# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. + +# On input, ":" is the most useful delimiter +# On output, we want tab-delimited text. +BEGIN { + FS = "[ \t]*:"; + OFS = "\t"; +} + +# Clean up trailing whitespace. +{ + sub(/[ \t]+$/, ""); +} + +# Continuation line: strip comment, if any, then append value, if any, +# to what we had from previous line(s). +/^[^A-Z]/ { + sub(/[ \t]*#.*$/, ""); + if (NF) + val = val $0; + next; +} + +# Anything other than line continuation terminates the previous line, +# so if we were working on a line, we're done with it now, process it. +key { + do_line(); +} + +# Non-empty line and we have no tag, this must be start of a new block. +NF && !tag { + tag = $1; +} + +# One of the tags we care about, clean up and save the data. +/^(AS-NAME|AUT-NUM|INET6NUM|INETNUM|MNT-BY|NETNAME|STATUS):/ { + key = $1; + sub(/^[^ \t]+:/, ""); + sub(/[ \t]*#.*$/, ""); + val = $0; +} + +# Blank line and we have something, process it. +!NF && tag { + do_block(); +} + +# End of file, process final data, if any. +END { + do_line(); + do_block(); +} + +# Handle one line, after line icky RPSL continuation. +function do_line() { + gsub(/[ \t]/, "", val); + if (key && val) + tags[key] = val; + key = ""; + val = ""; +} + +# Dispatch to handle known block types, then clean up so we can start +# a new block. +function do_block() { + if (tag == "INETNUM" || tag == "INET6NUM") + do_prefix(); + else if (tag == "AUT-NUM") + do_asn(); + delete tags; + tag = ""; +} + +# Handle an AUT-NUM block: extract the ASN, use MNT-BY as the handle. +function do_asn() { + sub(/^AS/, "", tags[tag]); + if (tags["MNT-BY"] && tags[tag]) + print tags["MNT-BY"], tags[tag] >"asns.csv"; +} + +# Handle an INETNUM or INET6NUM block: check for the status values we +# care about, use NETNAME as the handle. +function do_prefix() { + if (tags["STATUS"] ~ /^ASSIGNED(P[AI])$/ && tags["NETNAME"] && tags[tag]) + print tags["NETNAME"], tags[tag] >"prefixes.csv"; +} |