diff options
author | Rob Austein <sra@hactrn.net> | 2009-07-27 17:09:06 +0000 |
---|---|---|
committer | Rob Austein <sra@hactrn.net> | 2009-07-27 17:09:06 +0000 |
commit | ee2acb5dc9428b42aa84850af06ebe285a8eb6ca (patch) | |
tree | f3cf46dcc0d2a7782b4638ffcf8cad7f8a03df62 | |
parent | 340bc8807e77fe3704591a4ae4654ed32734352b (diff) |
Cleanup
svn path=/myrpki/arin-to-csv.py; revision=2666
-rw-r--r-- | myrpki/arin-to-csv.py | 69 | ||||
-rw-r--r-- | myrpki/myrpki.py | 14 | ||||
-rw-r--r-- | myrpki/yamltest.py | 2 |
3 files changed, 39 insertions, 46 deletions
diff --git a/myrpki/arin-to-csv.py b/myrpki/arin-to-csv.py index f448ff1a..ca622ccf 100644 --- a/myrpki/arin-to-csv.py +++ b/myrpki/arin-to-csv.py @@ -3,8 +3,8 @@ Parse a WHOIS research dump and write out (just) the RPKI-relevant fields in myrpki-format CSV syntax. NB: The input data for this script comes from ARIN under an agreement -that allows research use but forbids redistribution it, so if you -think you need a copy, please talk to ARIN about it, not us. +that allows research use but forbids redistribution, so if you think +you need a copy of the data, please talk to ARIN about it, not us. $Id$ @@ -23,32 +23,7 @@ OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. """ -import gzip, csv - -def parseline(line): - tag, sep, val = line.partition(":") - assert sep, "Couldn't find separator in %r" % line - return tag.strip(), val.strip() - -def main(): - f = gzip.open("arin_db.txt.gz") - cur = None - for line in f: - line = line.expandtabs().strip() - if not line: - if cur: - cur.finish() - cur = None - elif not line.startswith("#"): - tag, val = parseline(line) - if cur is None: - cur = types[tag]() - if cur: - cur.set(tag, val) - if cur: - cur.finish() - -#db = {} +import gzip, csv, myrpki class Handle(object): @@ -63,9 +38,6 @@ class Handle(object): if not hasattr(self, tag): return print repr(self) -# if self.OrgID not in db: -# db[self.OrgID] = [] -# db[self.OrgID].append(self) class ASHandle(Handle): @@ -98,14 +70,37 @@ class V6NetHandle(NetHandle): self.OrgID, self.V6NetHandle, self.NetType, self.NetRange) -def DontBother(): - return False - types = { "ASHandle" : ASHandle, "NetHandle" : NetHandle, - "V6NetHandle" : V6NetHandle, - "POCHandle" : DontBother, - "OrgID" : DontBother } + "V6NetHandle" : V6NetHandle } + +def parseline(line): + tag, sep, val = line.partition(":") + assert sep, "Couldn't find separator in %r" % line + return tag.strip(), val.strip() + +def csvout(fn): + return csv.writer(open(path, "w"), dialect = myrpki.csv_dialect) + +def main(): + f = gzip.open("arin_db.txt.gz") + cur = None + asn_csv = csvout("asns.csv") + prefix_csv = csvout("prefixes.csv") + for line in f: + line = line.expandtabs().strip() + if not line: + if cur: + cur.finish() + cur = None + elif not line.startswith("#"): + tag, val = parseline(line) + if cur is None: + cur = types[tag]() if tag in types else False + if cur: + cur.set(tag, val) + if cur: + cur.finish() main() diff --git a/myrpki/myrpki.py b/myrpki/myrpki.py index dbe72b10..504137aa 100644 --- a/myrpki/myrpki.py +++ b/myrpki/myrpki.py @@ -57,14 +57,12 @@ from xml.etree.ElementTree import Element, SubElement, ElementTree namespace = "http://www.hactrn.net/uris/rpki/myrpki/" -# Dialect parameters for our use of CSV files, here to make them easy -# to change if your site needs to do something different. See doc for -# the csv module in the Python standard libraries for details if you -# need to customize these. - -csv_delimiter = "\t" -csv_dialect = None +# Dialect for our use of CSV files, here to make it easy to change if +# your site needs to do something different. See doc for the csv +# module in the Python standard libraries for details if you need to +# customize this. +csv_dialect = csv.get_dialect("excel-tab") class comma_set(set): """ @@ -304,7 +302,7 @@ def csv_open(filename): You may need to tweak this function for your environment, see the csv module in the Python standard libraries for details. """ - return csv.reader(open(filename, "rb"), dialect = csv_dialect, delimiter = csv_delimiter) + return csv.reader(open(filename, "rb"), dialect = csv_dialect) def PEMElement(e, tag, filename): """ diff --git a/myrpki/yamltest.py b/myrpki/yamltest.py index e6c1278a..f1c34b06 100644 --- a/myrpki/yamltest.py +++ b/myrpki/yamltest.py @@ -293,7 +293,7 @@ class allocation(object): """ path = self.path(fn) print "Writing", path - return csv.writer(open(path, "wb"), delimiter = myrpki.csv_delimiter, dialect = myrpki.csv_dialect) + return csv.writer(open(path, "w"), dialect = myrpki.csv_dialect) def up_down_url(self): """ |