aboutsummaryrefslogtreecommitdiff
path: root/potpourri/whack-ripe-prefixes.py
diff options
context:
space:
mode:
Diffstat (limited to 'potpourri/whack-ripe-prefixes.py')
-rw-r--r--potpourri/whack-ripe-prefixes.py101
1 files changed, 101 insertions, 0 deletions
diff --git a/potpourri/whack-ripe-prefixes.py b/potpourri/whack-ripe-prefixes.py
new file mode 100644
index 00000000..52ea3f18
--- /dev/null
+++ b/potpourri/whack-ripe-prefixes.py
@@ -0,0 +1,101 @@
+# $Id$
+#
+# Copyright (C) 2010 Internet Systems Consortium ("ISC")
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+# AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE.
+
+"""
+Fix problems in prefixes.csv generated from RIPE's database.
+
+RIPE's database contains inconsistancies, overlaps, and format errors
+that make it impossible to feed the output of ripe-to-csv.awk directly
+into testbed-rootcert.py without OpenSSL rejecting the resulting
+root.conf. This script takes a brute force approach to fixing this:
+it converts all prefixes and address ranges into pairs of unsigned
+decimal integers representing range min and range max, runs the
+resulting 3+ million entry file through the unix sort program to put
+the data into canonical order, then reads it back, merging overlaps
+and converting everything back to ranges of IP addresses, and writing
+the result in a form acceptable to testbed-rootcert.py.
+
+Since we're doing all this anyway, the script also merges adjacent
+address blocks, not because OpenSSL can't handle them (it can) but
+because doing so cuts out a lot of unnecessary I/O.
+
+Ordinarily, it would be dangerous to have the same program act as both
+the source and sink of a pipeline, particularly for such a large data
+set, as the chance of deadlock would approach 100%, but in this case
+we know that the sort program must consume and buffer (somehow) all of
+its input before writing a single line of output, so a single script
+can safely act as a filter both before and after sort.
+"""
+
+import sys
+import subprocess
+import rpki.ipaddrs
+
+sorter = subprocess.Popen(("sort", "-T.", "-n"),
+ stdin = subprocess.PIPE,
+ stdout = subprocess.PIPE)
+
+for line in sys.stdin:
+ handle, prefix = line.split()
+
+ if "-" in prefix:
+ range_min, range_max = prefix.split("-")
+ range_min = rpki.ipaddrs.parse(range_min)
+ range_max = rpki.ipaddrs.parse(range_max)
+
+ else:
+ address, length = prefix.split("/")
+ address = rpki.ipaddrs.parse(address)
+ mask = (1L << (address.bits - int(length))) - 1
+ range_min = address & ~mask
+ range_max = address | mask
+
+ sorter.stdin.write("%d %d\n" % (long(range_min), long(range_max)))
+
+sorter.stdin.close()
+
+prev_min = None
+prev_max = None
+
+def address(number):
+ if number > 0xffffffff:
+ return rpki.ipaddrs.v6addr(number)
+ else:
+ return rpki.ipaddrs.v4addr(number)
+
+def show():
+ if prev_min and prev_max:
+ sys.stdout.write("x\t%s-%s\n" % (address(prev_min), address(prev_max)))
+
+for line in sorter.stdout:
+ this_min, this_max = line.split()
+ this_min = long(this_min)
+ this_max = long(this_max)
+
+ if prev_min and prev_max and prev_max + 1 >= this_min:
+ prev_min = min(prev_min, this_min)
+ prev_max = max(prev_max, this_max)
+
+ else:
+ show()
+ prev_min = this_min
+ prev_max = this_max
+
+show()
+
+sorter.stdout.close()
+
+sys.exit(sorter.wait())