diff options
author | Rob Austein <sra@hactrn.net> | 2010-10-26 22:56:34 +0000 |
---|---|---|
committer | Rob Austein <sra@hactrn.net> | 2010-10-26 22:56:34 +0000 |
commit | 1672b574f9b0b3cb2ee8c3b8a947ae2f411c4904 (patch) | |
tree | 8128c068b0211cff94f7ba0208e046360f806875 | |
parent | 7c2f0aa4ff0b101484458e0c1296df6b694bac1b (diff) |
RPSL line continuation is not my friend
svn path=/scripts/ripe-to-csv.awk; revision=3496
-rw-r--r-- | scripts/ripe-to-csv.awk | 65 |
1 files changed, 39 insertions, 26 deletions
diff --git a/scripts/ripe-to-csv.awk b/scripts/ripe-to-csv.awk index bebcb64a..a7073c38 100644 --- a/scripts/ripe-to-csv.awk +++ b/scripts/ripe-to-csv.awk @@ -18,13 +18,6 @@ # fairly simple stream parser that has to process a ridiculous amount # of text. AWK turns out to be significantly faster for this. # -# There are a few known screw cases in RPSL format that this script -# doesn't attempt to handle, so if you just can't resist using -# newlines between the begin and end addresses of an IPv4 address -# range, this script will not understand your WHOIS entry. So don't. -# -# Feh. -# # NB: The input data for this script is publicly available via FTP, but # you'll have to fetch the data from RIPE yourself, and be sure to see # the terms and conditions referenced by the data file header comments. @@ -52,52 +45,72 @@ BEGIN { OFS = "\t"; } -# Clean up comments and trailing whitespace; skip lines that are empty -# after cleanup. If we were attempting to handle line continuation, -# this is where we'd start. -!/^$/ { - sub(/#.*$/, ""); +# Clean up trailing whitespace. +{ sub(/[ \t]+$/, ""); - if (!NF) - next; } -# Non-empty line and we have no tag, must be start of a new block. +# Continuation line: strip comment, if any, then append value, if any, +# to what we had from previous line(s). +/^[^A-Z]/ { + sub(/[ \t]*#.*$/, ""); + if (NF) + val = val $0; + next; +} + +# Anything other than line continuation terminates the previous line, +# so if we were working on a line, we're done with it now, process it. +key { + do_line(); +} + +# Non-empty line and we have no tag, this must be start of a new block. NF && !tag { tag = $1; } # One of the tags we care about, clean up and save the data. /^(AS-NAME|AUT-NUM|INET6NUM|INETNUM|MNT-BY|NETNAME|STATUS):/ { - t = $1; + key = $1; sub(/^[^ \t]+:/, ""); - gsub(/[ \t]/, ""); - tags[t] = $0; + sub(/[ \t]*#.*$/, ""); + val = $0; } # Blank line and we have something, process it. !NF && tag { - got_one(); + do_block(); } -# End of file, process last entry, if any. +# End of file, process final data, if any. END { - got_one(); + do_line(); + do_block(); +} + +# Handle one line, after line icky RPSL continuation. +function do_line() { + gsub(/[ \t]/, "", val); + if (key && val) + tags[key] = val; + key = ""; + val = ""; } # Dispatch to handle known block types, then clean up so we can start # a new block. -function got_one() { +function do_block() { if (tag == "INETNUM" || tag == "INET6NUM") - got_inetnum(); + do_prefix(); else if (tag == "AUT-NUM") - got_aut_num(); + do_asn(); delete tags; tag = ""; } # Handle an AUT-NUM block: extract the ASN, use MNT-BY as the handle. -function got_aut_num() { +function do_asn() { sub(/^AS/, "", tags[tag]); if (tags["MNT-BY"] && tags[tag]) print tags["MNT-BY"], tags[tag] >"asns.csv"; @@ -105,7 +118,7 @@ function got_aut_num() { # Handle an INETNUM or INET6NUM block: check for the status values we # care about, use NETNAME as the handle. -function got_inetnum() { +function do_prefix() { if (tags["STATUS"] ~ /^ASSIGNED(P[AI])?$/ && tags["NETNAME"] && tags[tag]) print tags["NETNAME"], tags[tag] >"prefixes.csv"; } |