diff options
author | Rob Austein <sra@hactrn.net> | 2009-07-27 16:03:43 +0000 |
---|---|---|
committer | Rob Austein <sra@hactrn.net> | 2009-07-27 16:03:43 +0000 |
commit | 340bc8807e77fe3704591a4ae4654ed32734352b (patch) | |
tree | c5402e1919f5644f6123aa4c5a63058d5a0f9d8d /myrpki/arin-to-csv.py | |
parent | f0c701ada5625566bb8a3fbe1423fe064db0ad39 (diff) |
(Incomplete) script populate test database.
svn path=/myrpki/arin-to-csv.py; revision=2665
Diffstat (limited to 'myrpki/arin-to-csv.py')
-rw-r--r-- | myrpki/arin-to-csv.py | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/myrpki/arin-to-csv.py b/myrpki/arin-to-csv.py new file mode 100644 index 00000000..f448ff1a --- /dev/null +++ b/myrpki/arin-to-csv.py @@ -0,0 +1,111 @@ +""" +Parse a WHOIS research dump and write out (just) the RPKI-relevant +fields in myrpki-format CSV syntax. + +NB: The input data for this script comes from ARIN under an agreement +that allows research use but forbids redistribution it, so if you +think you need a copy, please talk to ARIN about it, not us. + +$Id$ + +Copyright (C) 2009 Internet Systems Consortium ("ISC") + +Permission to use, copy, modify, and distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. +""" + +import gzip, csv + +def parseline(line): + tag, sep, val = line.partition(":") + assert sep, "Couldn't find separator in %r" % line + return tag.strip(), val.strip() + +def main(): + f = gzip.open("arin_db.txt.gz") + cur = None + for line in f: + line = line.expandtabs().strip() + if not line: + if cur: + cur.finish() + cur = None + elif not line.startswith("#"): + tag, val = parseline(line) + if cur is None: + cur = types[tag]() + if cur: + cur.set(tag, val) + if cur: + cur.finish() + +#db = {} + +class Handle(object): + + want_tags = () + + def set(self, tag, val): + if tag in self.want_tags: + setattr(self, tag, "".join(val.split(" "))) + + def finish(self): + for tag in self.want_tags: + if not hasattr(self, tag): + return + print repr(self) +# if self.OrgID not in db: +# db[self.OrgID] = [] +# db[self.OrgID].append(self) + +class ASHandle(Handle): + + want_tags = ("ASHandle", "ASNumber", "OrgID") + + def __repr__(self): + return "<%s %s.%s %s>" % (self.__class__.__name__, + self.OrgID, self.ASHandle, self.ASNumber) + +class NetHandle(Handle): + + NetType = None + + want_tags = ("NetHandle", "NetRange", "NetType", "OrgID") + + def finish(self): + if self.NetType in ("allocation", "assignment"): + Handle.finish(self) + + def __repr__(self): + return "<%s %s.%s %s %s>" % (self.__class__.__name__, + self.OrgID, self.NetHandle, + self.NetType, self.NetRange) +class V6NetHandle(NetHandle): + + want_tags = ("V6NetHandle", "NetRange", "NetType", "OrgID") + + def __repr__(self): + return "<%s %s.%s %s %s>" % (self.__class__.__name__, + self.OrgID, self.V6NetHandle, + self.NetType, self.NetRange) + +def DontBother(): + return False + +types = { + "ASHandle" : ASHandle, + "NetHandle" : NetHandle, + "V6NetHandle" : V6NetHandle, + "POCHandle" : DontBother, + "OrgID" : DontBother } + +main() |