diff options
author | Rob Austein <sra@hactrn.net> | 2014-04-05 22:42:12 +0000 |
---|---|---|
committer | Rob Austein <sra@hactrn.net> | 2014-04-05 22:42:12 +0000 |
commit | fe0bf509f528dbdc50c7182f81057c6a4e15e4bd (patch) | |
tree | 07c9a923d4a0ccdfea11c49cd284f6d5757c5eda /scripts/arin-to-csv.py | |
parent | aa28ef54c271fbe4d52860ff8cf13cab19e2207c (diff) |
Source tree reorg, phase 1. Almost everything moved, no file contents changed.
svn path=/branches/tk685/; revision=5757
Diffstat (limited to 'scripts/arin-to-csv.py')
-rw-r--r-- | scripts/arin-to-csv.py | 114 |
1 files changed, 0 insertions, 114 deletions
diff --git a/scripts/arin-to-csv.py b/scripts/arin-to-csv.py deleted file mode 100644 index a4e7ffc3..00000000 --- a/scripts/arin-to-csv.py +++ /dev/null @@ -1,114 +0,0 @@ -# $Id$ -# -# Copyright (C) 2009-2012 Internet Systems Consortium ("ISC") -# -# Permission to use, copy, modify, and distribute this software for any -# purpose with or without fee is hereby granted, provided that the above -# copyright notice and this permission notice appear in all copies. -# -# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH -# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY -# AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, -# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM -# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -# PERFORMANCE OF THIS SOFTWARE. - -""" -Parse an ARIN database research dump and write out (just) the -RPKI-relevant fields in myrpki-format CSV syntax. - -NB: The input data for this script comes from ARIN under an agreement -that allows research use but forbids redistribution, so if you think -you need a copy of the data, please talk to ARIN about it, not us. - -Input format used to be RPSL WHOIS dump, but ARIN recently went Java, -so we have to parse a 3.5GB XML "document". Credit to Liza Daly for -explaining the incantations needed to convince lxml to do this nicely, -see: http://www.ibm.com/developerworks/xml/library/x-hiperfparse/ -""" - -import sys -import lxml.etree - -from rpki.csv_utils import csv_writer - -def ns(tag): - return "{http://www.arin.net/bulkwhois/core/v1}" + tag - -tag_asn = ns("asn") -tag_net = ns("net") -tag_org = ns("org") -tag_poc = ns("poc") -tag_orgHandle = ns("orgHandle") -tag_netBlock = ns("netBlock") -tag_type = ns("type") -tag_startAddress = ns("startAddress") -tag_endAddress = ns("endAddress") -tag_startAsNumber = ns("startAsNumber") -tag_endAsNumber = ns("endAsNumber") - -def find(node, tag): - return node.findtext(tag).strip() - -def do_asn(node): - asns.writerow((find(node, tag_orgHandle), - "%s-%s" % (find(node, tag_startAsNumber), - find(node, tag_endAsNumber)))) - -erx_table = { - "AF" : "afrinic", - "AP" : "apnic", - "AR" : "arin", - "AV" : "arin", - "FX" : "afrinic", - "LN" : "lacnic", - "LX" : "lacnic", - "PV" : "apnic", - "PX" : "apnic", - "RN" : "ripe", - "RV" : "ripe", - "RX" : "ripe" } - -def do_net(node): - handle = find(node, tag_orgHandle) - for netblock in node.iter(tag_netBlock): - tag = find(netblock, tag_type) - startAddress = find(netblock, tag_startAddress) - endAddress = find(netblock, tag_endAddress) - if not startAddress.endswith(".000") and not startAddress.endswith(":0000"): - continue - if not endAddress.endswith(".255") and not endAddress.endswith(":FFFF"): - continue - if tag in ("DS", "DA", "IU"): - prefixes.writerow((handle, "%s-%s" % (startAddress, endAddress))) - elif tag in erx_table: - erx.writerow((erx_table[tag], "%s-%s" % (startAddress, endAddress))) - -dispatch = { tag_asn : do_asn, tag_net : do_net } - -asns = csv_writer("asns.csv") -prefixes = csv_writer("prefixes.csv") -erx = csv_writer("erx.csv") - -root = None - -for event, node in lxml.etree.iterparse(sys.stdin): - - if root is None: - root = node - while root.getparent() is not None: - root = root.getparent() - - if node.getparent() is root: - - if node.tag in dispatch: - dispatch[node.tag](node) - - node.clear() - while node.getprevious() is not None: - del node.getparent()[0] - -asns.close() -prefixes.close() -erx.close() |