From 0bf7f83eadf95a5297468e68983140c606371dc0 Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Wed, 2 May 2012 21:18:08 +0000 Subject: First cut at hack to dump all relevant IANA data to CSV. Output still needs to be merged with ERX data from ARIN's database, as IANA only deals with /8s in IPv4 space. Haven't figured out yet whether that should be a separate script or not. See #33. svn path=/branches/tk33/; revision=4460 --- scripts/iana-to-csv.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 scripts/iana-to-csv.py (limited to 'scripts/iana-to-csv.py') diff --git a/scripts/iana-to-csv.py b/scripts/iana-to-csv.py new file mode 100644 index 00000000..dd524d8c --- /dev/null +++ b/scripts/iana-to-csv.py @@ -0,0 +1,64 @@ +""" +Parse IANA XML data. + +$Id$ + +Copyright (C) 2010-2012 Internet Systems Consortium ("ISC") + +Permission to use, copy, modify, and distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. +""" + +import sys +import lxml.etree +from rpki.csv_utils import csv_writer + +def iterate_xml(filename, tag): + return lxml.etree.parse(filename).getroot().getiterator(tag) + +def ns(tag): + return "{http://www.iana.org/assignments}" + tag + +tag_description = ns("description") +tag_designation = ns("designation") +tag_record = ns("record") +tag_number = ns("number") +tag_prefix = ns("prefix") + +handles = {} + +for rir in ("AfriNIC", "APNIC", "ARIN", "LACNIC", "RIPE NCC"): + handles[rir] = handles["Assigned by %s" % rir] = handles["Administered by %s" % rir] = rir.split()[0].upper() + +asns = csv_writer("asns.csv") +prefixes = csv_writer("prefixes.csv") + +for record in iterate_xml("as-numbers.xml", tag_record): + description = record.findtext(tag_description) + if description in handles: + asns.writerow((handles[description], record.findtext(tag_number))) + +for record in iterate_xml("ipv4-address-space.xml", tag_record): + designation = record.findtext(tag_designation) + if designation in handles: + prefix = record.findtext(tag_prefix) + p, l = prefix.split("/") + assert l == "8", "Violated /8 assumption: %r" % prefix + prefixes.writerow((handles[designation], "%d.0.0.0/8" % int(p))) + +for record in iterate_xml("ipv6-unicast-address-assignments.xml", tag_record): + description = record.findtext(tag_description) + if record.findtext(tag_description) in handles: + prefixes.writerow((handles[description], record.findtext(tag_prefix))) + +asns.close() +prefixes.close() -- cgit v1.2.3 From 344c3f6a9fc40cff4d6c3ae31d35158aacb214f8 Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Thu, 3 May 2012 03:30:56 +0000 Subject: Merge in ERX data. See #33. svn path=/branches/tk33/; revision=4462 --- scripts/iana-to-csv.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'scripts/iana-to-csv.py') diff --git a/scripts/iana-to-csv.py b/scripts/iana-to-csv.py index dd524d8c..2bfe2bf8 100644 --- a/scripts/iana-to-csv.py +++ b/scripts/iana-to-csv.py @@ -20,7 +20,8 @@ PERFORMANCE OF THIS SOFTWARE. import sys import lxml.etree -from rpki.csv_utils import csv_writer +from rpki.csv_utils import csv_reader, csv_writer +from rpki.resource_set import resource_bag def iterate_xml(filename, tag): return lxml.etree.parse(filename).getroot().getiterator(tag) @@ -35,9 +36,12 @@ tag_number = ns("number") tag_prefix = ns("prefix") handles = {} +rirs = {} for rir in ("AfriNIC", "APNIC", "ARIN", "LACNIC", "RIPE NCC"): - handles[rir] = handles["Assigned by %s" % rir] = handles["Administered by %s" % rir] = rir.split()[0].upper() + handle = rir.split()[0].upper() + handles[rir] = handles["Assigned by %s" % rir] = handles["Administered by %s" % rir] = handle + rirs[handle] = resource_bag() asns = csv_writer("asns.csv") prefixes = csv_writer("prefixes.csv") @@ -53,12 +57,27 @@ for record in iterate_xml("ipv4-address-space.xml", tag_record): prefix = record.findtext(tag_prefix) p, l = prefix.split("/") assert l == "8", "Violated /8 assumption: %r" % prefix - prefixes.writerow((handles[designation], "%d.0.0.0/8" % int(p))) + rirs[handles[designation]] |= resource_bag.from_str("%d.0.0.0/8" % int(p)) for record in iterate_xml("ipv6-unicast-address-assignments.xml", tag_record): description = record.findtext(tag_description) if record.findtext(tag_description) in handles: - prefixes.writerow((handles[description], record.findtext(tag_prefix))) + rirs[handles[description]] |= resource_bag.from_str(record.findtext(tag_prefix)) + +erx = list(csv_reader("erx.csv")) +assert all(r in rirs for r, p in erx) + +erx_overrides = resource_bag.from_str(",".join(p for r, p in erx), allow_overlap = True) + +for rir in rirs: + rirs[rir] -= erx_overrides + rirs[rir] |= resource_bag.from_str(",".join(p for r, p in erx if r == rir), allow_overlap = True) + +for rir, bag in rirs.iteritems(): + for p in bag.v4: + prefixes.writerow((rir, p)) + for p in bag.v6: + prefixes.writerow((rir, p)) asns.close() prefixes.close() -- cgit v1.2.3 From 84c82bea2b5c77202e5712511e9476a54c83e67a Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Thu, 3 May 2012 17:39:35 +0000 Subject: Handle legacy via sixth pseudo-RIR named "LEGACY," at least for now. svn path=/branches/tk33/; revision=4463 --- scripts/iana-to-csv.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'scripts/iana-to-csv.py') diff --git a/scripts/iana-to-csv.py b/scripts/iana-to-csv.py index 2bfe2bf8..a9aee0b9 100644 --- a/scripts/iana-to-csv.py +++ b/scripts/iana-to-csv.py @@ -34,9 +34,10 @@ tag_designation = ns("designation") tag_record = ns("record") tag_number = ns("number") tag_prefix = ns("prefix") +tag_status = ns("status") handles = {} -rirs = {} +rirs = { "LEGACY" : resource_bag() } for rir in ("AfriNIC", "APNIC", "ARIN", "LACNIC", "RIPE NCC"): handle = rir.split()[0].upper() @@ -53,12 +54,12 @@ for record in iterate_xml("as-numbers.xml", tag_record): for record in iterate_xml("ipv4-address-space.xml", tag_record): designation = record.findtext(tag_designation) - if designation in handles: - prefix = record.findtext(tag_prefix) - p, l = prefix.split("/") - assert l == "8", "Violated /8 assumption: %r" % prefix - rirs[handles[designation]] |= resource_bag.from_str("%d.0.0.0/8" % int(p)) - + if record.findtext(tag_status) != "RESERVED": + prefix, prefixlen = [int(i) for i in record.findtext(tag_prefix).split("/")] + if prefixlen != 8: + raise ValueError("%s violated /8 assumption" % record.findtext(tag_prefix)) + rirs[handles.get(designation, "LEGACY")] |= resource_bag.from_str("%d.0.0.0/8" % prefix) + for record in iterate_xml("ipv6-unicast-address-assignments.xml", tag_record): description = record.findtext(tag_description) if record.findtext(tag_description) in handles: @@ -70,8 +71,9 @@ assert all(r in rirs for r, p in erx) erx_overrides = resource_bag.from_str(",".join(p for r, p in erx), allow_overlap = True) for rir in rirs: - rirs[rir] -= erx_overrides - rirs[rir] |= resource_bag.from_str(",".join(p for r, p in erx if r == rir), allow_overlap = True) + if rir != "LEGACY": + rirs[rir] -= erx_overrides + rirs[rir] |= resource_bag.from_str(",".join(p for r, p in erx if r == rir), allow_overlap = True) for rir, bag in rirs.iteritems(): for p in bag.v4: -- cgit v1.2.3 From c151cd5c3405b73279809b9107651d5bfbc21338 Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Sat, 5 May 2012 06:08:16 +0000 Subject: Use lowercase handle names for RIRs, for consistancy with older code. svn path=/branches/tk33/; revision=4469 --- scripts/iana-to-csv.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'scripts/iana-to-csv.py') diff --git a/scripts/iana-to-csv.py b/scripts/iana-to-csv.py index a9aee0b9..ee8c53d1 100644 --- a/scripts/iana-to-csv.py +++ b/scripts/iana-to-csv.py @@ -37,10 +37,10 @@ tag_prefix = ns("prefix") tag_status = ns("status") handles = {} -rirs = { "LEGACY" : resource_bag() } +rirs = { "legacy" : resource_bag() } for rir in ("AfriNIC", "APNIC", "ARIN", "LACNIC", "RIPE NCC"): - handle = rir.split()[0].upper() + handle = rir.split()[0].lower() handles[rir] = handles["Assigned by %s" % rir] = handles["Administered by %s" % rir] = handle rirs[handle] = resource_bag() @@ -58,7 +58,7 @@ for record in iterate_xml("ipv4-address-space.xml", tag_record): prefix, prefixlen = [int(i) for i in record.findtext(tag_prefix).split("/")] if prefixlen != 8: raise ValueError("%s violated /8 assumption" % record.findtext(tag_prefix)) - rirs[handles.get(designation, "LEGACY")] |= resource_bag.from_str("%d.0.0.0/8" % prefix) + rirs[handles.get(designation, "legacy")] |= resource_bag.from_str("%d.0.0.0/8" % prefix) for record in iterate_xml("ipv6-unicast-address-assignments.xml", tag_record): description = record.findtext(tag_description) @@ -71,7 +71,7 @@ assert all(r in rirs for r, p in erx) erx_overrides = resource_bag.from_str(",".join(p for r, p in erx), allow_overlap = True) for rir in rirs: - if rir != "LEGACY": + if rir != "legacy": rirs[rir] -= erx_overrides rirs[rir] |= resource_bag.from_str(",".join(p for r, p in erx if r == rir), allow_overlap = True) -- cgit v1.2.3