From 0bf7f83eadf95a5297468e68983140c606371dc0 Mon Sep 17 00:00:00 2001
From: Rob Austein <sra@hactrn.net>
Date: Wed, 2 May 2012 21:18:08 +0000
Subject: First cut at hack to dump all relevant IANA data to CSV.  Output
 still needs to be merged with ERX data from ARIN's database, as IANA only
 deals with /8s in IPv4 space.  Haven't figured out yet whether that should be
 a separate script or not.  See #33.

svn path=/branches/tk33/; revision=4460
---
 scripts/iana-to-csv.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 scripts/iana-to-csv.py

(limited to 'scripts/iana-to-csv.py')

diff --git a/scripts/iana-to-csv.py b/scripts/iana-to-csv.py
new file mode 100644
index 00000000..dd524d8c
--- /dev/null
+++ b/scripts/iana-to-csv.py
@@ -0,0 +1,64 @@
+"""
+Parse IANA XML data.
+
+$Id$
+
+Copyright (C) 2010-2012  Internet Systems Consortium ("ISC")
+
+Permission to use, copy, modify, and distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
+"""
+
+import sys
+import lxml.etree
+from rpki.csv_utils import csv_writer
+
+def iterate_xml(filename, tag):
+  return lxml.etree.parse(filename).getroot().getiterator(tag)
+
+def ns(tag):
+  return "{http://www.iana.org/assignments}" + tag
+
+tag_description = ns("description")
+tag_designation = ns("designation")
+tag_record      = ns("record")
+tag_number      = ns("number")
+tag_prefix      = ns("prefix")
+
+handles = {}
+
+for rir in ("AfriNIC", "APNIC", "ARIN", "LACNIC", "RIPE NCC"):
+  handles[rir] = handles["Assigned by %s" % rir] = handles["Administered by %s" % rir] = rir.split()[0].upper()
+
+asns     = csv_writer("asns.csv")
+prefixes = csv_writer("prefixes.csv")
+
+for record in iterate_xml("as-numbers.xml", tag_record):
+  description = record.findtext(tag_description)
+  if description in handles:
+    asns.writerow((handles[description], record.findtext(tag_number)))
+    
+for record in iterate_xml("ipv4-address-space.xml", tag_record):
+  designation = record.findtext(tag_designation)
+  if designation in handles:
+    prefix = record.findtext(tag_prefix)
+    p, l = prefix.split("/")
+    assert l == "8", "Violated /8 assumption: %r" % prefix
+    prefixes.writerow((handles[designation], "%d.0.0.0/8" % int(p)))
+    
+for record in iterate_xml("ipv6-unicast-address-assignments.xml", tag_record):
+  description = record.findtext(tag_description)
+  if record.findtext(tag_description) in handles:
+    prefixes.writerow((handles[description], record.findtext(tag_prefix)))
+
+asns.close()
+prefixes.close()
-- 
cgit v1.2.3


From 344c3f6a9fc40cff4d6c3ae31d35158aacb214f8 Mon Sep 17 00:00:00 2001
From: Rob Austein <sra@hactrn.net>
Date: Thu, 3 May 2012 03:30:56 +0000
Subject: Merge in ERX data.  See #33.

svn path=/branches/tk33/; revision=4462
---
 scripts/iana-to-csv.py | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

(limited to 'scripts/iana-to-csv.py')

diff --git a/scripts/iana-to-csv.py b/scripts/iana-to-csv.py
index dd524d8c..2bfe2bf8 100644
--- a/scripts/iana-to-csv.py
+++ b/scripts/iana-to-csv.py
@@ -20,7 +20,8 @@ PERFORMANCE OF THIS SOFTWARE.
 
 import sys
 import lxml.etree
-from rpki.csv_utils import csv_writer
+from rpki.csv_utils import csv_reader, csv_writer
+from rpki.resource_set import resource_bag
 
 def iterate_xml(filename, tag):
   return lxml.etree.parse(filename).getroot().getiterator(tag)
@@ -35,9 +36,12 @@ tag_number      = ns("number")
 tag_prefix      = ns("prefix")
 
 handles = {}
+rirs = {}
 
 for rir in ("AfriNIC", "APNIC", "ARIN", "LACNIC", "RIPE NCC"):
-  handles[rir] = handles["Assigned by %s" % rir] = handles["Administered by %s" % rir] = rir.split()[0].upper()
+  handle = rir.split()[0].upper()
+  handles[rir] = handles["Assigned by %s" % rir] = handles["Administered by %s" % rir] = handle
+  rirs[handle] = resource_bag()
 
 asns     = csv_writer("asns.csv")
 prefixes = csv_writer("prefixes.csv")
@@ -53,12 +57,27 @@ for record in iterate_xml("ipv4-address-space.xml", tag_record):
     prefix = record.findtext(tag_prefix)
     p, l = prefix.split("/")
     assert l == "8", "Violated /8 assumption: %r" % prefix
-    prefixes.writerow((handles[designation], "%d.0.0.0/8" % int(p)))
+    rirs[handles[designation]] |= resource_bag.from_str("%d.0.0.0/8" % int(p))
     
 for record in iterate_xml("ipv6-unicast-address-assignments.xml", tag_record):
   description = record.findtext(tag_description)
   if record.findtext(tag_description) in handles:
-    prefixes.writerow((handles[description], record.findtext(tag_prefix)))
+    rirs[handles[description]] |= resource_bag.from_str(record.findtext(tag_prefix))
+
+erx = list(csv_reader("erx.csv"))
+assert all(r in rirs for r, p in erx)
+
+erx_overrides = resource_bag.from_str(",".join(p for r, p in erx), allow_overlap = True)
+
+for rir in rirs:
+  rirs[rir] -= erx_overrides
+  rirs[rir] |= resource_bag.from_str(",".join(p for r, p in erx if r == rir), allow_overlap = True)
+
+for rir, bag in rirs.iteritems():
+  for p in bag.v4:
+    prefixes.writerow((rir, p))
+  for p in bag.v6:
+    prefixes.writerow((rir, p))
 
 asns.close()
 prefixes.close()
-- 
cgit v1.2.3


From 84c82bea2b5c77202e5712511e9476a54c83e67a Mon Sep 17 00:00:00 2001
From: Rob Austein <sra@hactrn.net>
Date: Thu, 3 May 2012 17:39:35 +0000
Subject: Handle legacy via sixth pseudo-RIR named "LEGACY," at least for now.

svn path=/branches/tk33/; revision=4463
---
 scripts/iana-to-csv.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'scripts/iana-to-csv.py')

diff --git a/scripts/iana-to-csv.py b/scripts/iana-to-csv.py
index 2bfe2bf8..a9aee0b9 100644
--- a/scripts/iana-to-csv.py
+++ b/scripts/iana-to-csv.py
@@ -34,9 +34,10 @@ tag_designation = ns("designation")
 tag_record      = ns("record")
 tag_number      = ns("number")
 tag_prefix      = ns("prefix")
+tag_status      = ns("status")
 
 handles = {}
-rirs = {}
+rirs = { "LEGACY" : resource_bag() }
 
 for rir in ("AfriNIC", "APNIC", "ARIN", "LACNIC", "RIPE NCC"):
   handle = rir.split()[0].upper()
@@ -53,12 +54,12 @@ for record in iterate_xml("as-numbers.xml", tag_record):
     
 for record in iterate_xml("ipv4-address-space.xml", tag_record):
   designation = record.findtext(tag_designation)
-  if designation in handles:
-    prefix = record.findtext(tag_prefix)
-    p, l = prefix.split("/")
-    assert l == "8", "Violated /8 assumption: %r" % prefix
-    rirs[handles[designation]] |= resource_bag.from_str("%d.0.0.0/8" % int(p))
-    
+  if record.findtext(tag_status) != "RESERVED":
+    prefix, prefixlen = [int(i) for i in record.findtext(tag_prefix).split("/")]
+    if prefixlen != 8:
+      raise ValueError("%s violated /8 assumption" % record.findtext(tag_prefix))
+    rirs[handles.get(designation, "LEGACY")] |= resource_bag.from_str("%d.0.0.0/8" % prefix)
+
 for record in iterate_xml("ipv6-unicast-address-assignments.xml", tag_record):
   description = record.findtext(tag_description)
   if record.findtext(tag_description) in handles:
@@ -70,8 +71,9 @@ assert all(r in rirs for r, p in erx)
 erx_overrides = resource_bag.from_str(",".join(p for r, p in erx), allow_overlap = True)
 
 for rir in rirs:
-  rirs[rir] -= erx_overrides
-  rirs[rir] |= resource_bag.from_str(",".join(p for r, p in erx if r == rir), allow_overlap = True)
+  if rir != "LEGACY":
+    rirs[rir] -= erx_overrides
+    rirs[rir] |= resource_bag.from_str(",".join(p for r, p in erx if r == rir), allow_overlap = True)
 
 for rir, bag in rirs.iteritems():
   for p in bag.v4:
-- 
cgit v1.2.3


From c151cd5c3405b73279809b9107651d5bfbc21338 Mon Sep 17 00:00:00 2001
From: Rob Austein <sra@hactrn.net>
Date: Sat, 5 May 2012 06:08:16 +0000
Subject: Use lowercase handle names for RIRs, for consistancy with older code.

svn path=/branches/tk33/; revision=4469
---
 scripts/iana-to-csv.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'scripts/iana-to-csv.py')

diff --git a/scripts/iana-to-csv.py b/scripts/iana-to-csv.py
index a9aee0b9..ee8c53d1 100644
--- a/scripts/iana-to-csv.py
+++ b/scripts/iana-to-csv.py
@@ -37,10 +37,10 @@ tag_prefix      = ns("prefix")
 tag_status      = ns("status")
 
 handles = {}
-rirs = { "LEGACY" : resource_bag() }
+rirs = { "legacy" : resource_bag() }
 
 for rir in ("AfriNIC", "APNIC", "ARIN", "LACNIC", "RIPE NCC"):
-  handle = rir.split()[0].upper()
+  handle = rir.split()[0].lower()
   handles[rir] = handles["Assigned by %s" % rir] = handles["Administered by %s" % rir] = handle
   rirs[handle] = resource_bag()
 
@@ -58,7 +58,7 @@ for record in iterate_xml("ipv4-address-space.xml", tag_record):
     prefix, prefixlen = [int(i) for i in record.findtext(tag_prefix).split("/")]
     if prefixlen != 8:
       raise ValueError("%s violated /8 assumption" % record.findtext(tag_prefix))
-    rirs[handles.get(designation, "LEGACY")] |= resource_bag.from_str("%d.0.0.0/8" % prefix)
+    rirs[handles.get(designation, "legacy")] |= resource_bag.from_str("%d.0.0.0/8" % prefix)
 
 for record in iterate_xml("ipv6-unicast-address-assignments.xml", tag_record):
   description = record.findtext(tag_description)
@@ -71,7 +71,7 @@ assert all(r in rirs for r, p in erx)
 erx_overrides = resource_bag.from_str(",".join(p for r, p in erx), allow_overlap = True)
 
 for rir in rirs:
-  if rir != "LEGACY":
+  if rir != "legacy":
     rirs[rir] -= erx_overrides
     rirs[rir] |= resource_bag.from_str(",".join(p for r, p in erx if r == rir), allow_overlap = True)
 
-- 
cgit v1.2.3