aboutsummaryrefslogtreecommitdiff
path: root/myrpki/arin-to-csv.py
diff options
context:
space:
mode:
authorRob Austein <sra@hactrn.net>2009-07-27 16:03:43 +0000
committerRob Austein <sra@hactrn.net>2009-07-27 16:03:43 +0000
commit340bc8807e77fe3704591a4ae4654ed32734352b (patch)
treec5402e1919f5644f6123aa4c5a63058d5a0f9d8d /myrpki/arin-to-csv.py
parentf0c701ada5625566bb8a3fbe1423fe064db0ad39 (diff)
(Incomplete) script populate test database.
svn path=/myrpki/arin-to-csv.py; revision=2665
Diffstat (limited to 'myrpki/arin-to-csv.py')
-rw-r--r--myrpki/arin-to-csv.py111
1 files changed, 111 insertions, 0 deletions
diff --git a/myrpki/arin-to-csv.py b/myrpki/arin-to-csv.py
new file mode 100644
index 00000000..f448ff1a
--- /dev/null
+++ b/myrpki/arin-to-csv.py
@@ -0,0 +1,111 @@
+"""
+Parse a WHOIS research dump and write out (just) the RPKI-relevant
+fields in myrpki-format CSV syntax.
+
+NB: The input data for this script comes from ARIN under an agreement
+that allows research use but forbids redistribution it, so if you
+think you need a copy, please talk to ARIN about it, not us.
+
+$Id$
+
+Copyright (C) 2009 Internet Systems Consortium ("ISC")
+
+Permission to use, copy, modify, and distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
+"""
+
+import gzip, csv
+
+def parseline(line):
+ tag, sep, val = line.partition(":")
+ assert sep, "Couldn't find separator in %r" % line
+ return tag.strip(), val.strip()
+
+def main():
+ f = gzip.open("arin_db.txt.gz")
+ cur = None
+ for line in f:
+ line = line.expandtabs().strip()
+ if not line:
+ if cur:
+ cur.finish()
+ cur = None
+ elif not line.startswith("#"):
+ tag, val = parseline(line)
+ if cur is None:
+ cur = types[tag]()
+ if cur:
+ cur.set(tag, val)
+ if cur:
+ cur.finish()
+
+#db = {}
+
+class Handle(object):
+
+ want_tags = ()
+
+ def set(self, tag, val):
+ if tag in self.want_tags:
+ setattr(self, tag, "".join(val.split(" ")))
+
+ def finish(self):
+ for tag in self.want_tags:
+ if not hasattr(self, tag):
+ return
+ print repr(self)
+# if self.OrgID not in db:
+# db[self.OrgID] = []
+# db[self.OrgID].append(self)
+
+class ASHandle(Handle):
+
+ want_tags = ("ASHandle", "ASNumber", "OrgID")
+
+ def __repr__(self):
+ return "<%s %s.%s %s>" % (self.__class__.__name__,
+ self.OrgID, self.ASHandle, self.ASNumber)
+
+class NetHandle(Handle):
+
+ NetType = None
+
+ want_tags = ("NetHandle", "NetRange", "NetType", "OrgID")
+
+ def finish(self):
+ if self.NetType in ("allocation", "assignment"):
+ Handle.finish(self)
+
+ def __repr__(self):
+ return "<%s %s.%s %s %s>" % (self.__class__.__name__,
+ self.OrgID, self.NetHandle,
+ self.NetType, self.NetRange)
+class V6NetHandle(NetHandle):
+
+ want_tags = ("V6NetHandle", "NetRange", "NetType", "OrgID")
+
+ def __repr__(self):
+ return "<%s %s.%s %s %s>" % (self.__class__.__name__,
+ self.OrgID, self.V6NetHandle,
+ self.NetType, self.NetRange)
+
+def DontBother():
+ return False
+
+types = {
+ "ASHandle" : ASHandle,
+ "NetHandle" : NetHandle,
+ "V6NetHandle" : V6NetHandle,
+ "POCHandle" : DontBother,
+ "OrgID" : DontBother }
+
+main()