From bd451c90f19c10c899a75795366d98da9977ef56 Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Sat, 2 Oct 2010 04:06:40 +0000 Subject: Updates to work with ARIN's new bulk-WHOIS service. ARIN now considers RPSL to be a legacy format, and the XSL that generates RPSL from their XML primary dump format doesn't normalize its output properly, so we get parse errors wherever somebody inserted, eg, newlines into contact phone numbers. For the moment, assume that we don't care about any line which fails to parse, which is a really bad assumption but suffices to get updates running again with the new input files. In the longer term, this script should be replaced by XSL that generates our CSV files directly from XML, bypassing RPSL entirely. svn path=/scripts/arin-to-csv.py; revision=3454 --- scripts/arin-to-csv.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'scripts') diff --git a/scripts/arin-to-csv.py b/scripts/arin-to-csv.py index 9939e7e9..2f1a3a14 100644 --- a/scripts/arin-to-csv.py +++ b/scripts/arin-to-csv.py @@ -23,7 +23,7 @@ OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. """ -import gzip, rpki.myrpki +import rpki.myrpki class Handle(object): @@ -88,12 +88,6 @@ class main(object): translations = {} - @staticmethod - def parseline(line): - tag, sep, val = line.partition(":") - assert sep, "Couldn't find separator in %r" % line - return tag.strip(), val.strip() - def __init__(self): self.asns = rpki.myrpki.csv_writer("asns.csv") self.prefixes = rpki.myrpki.csv_writer("prefixes.csv") @@ -101,7 +95,7 @@ class main(object): self.translations = dict((src, dst) for src, dst in rpki.myrpki.csv_reader("translations.csv", columns = 2)) except IOError: pass - f = gzip.open("arin_db.txt.gz") + f = open("arin_db.txt") cur = None for line in f: line = line.expandtabs().strip() @@ -110,7 +104,10 @@ class main(object): cur.finish(self) cur = None elif not line.startswith("#"): - tag, val = self.parseline(line) + tag, sep, val = tuple(s.strip() for s in line.partition(":")) + if not sep: + # This should not happen, but ARIN's "legacy" RPSL contains errors + continue if cur is None: cur = self.types[tag]() if tag in self.types else False if cur: -- cgit v1.2.3