Cleanup

svn path=/myrpki/arin-to-csv.py; revision=2666
author: Rob Austein <sra@hactrn.net> 2009-07-27 17:09:06 +0000
committer: Rob Austein <sra@hactrn.net> 2009-07-27 17:09:06 +0000
commit: ee2acb5dc9428b42aa84850af06ebe285a8eb6ca (patch)
tree: f3cf46dcc0d2a7782b4638ffcf8cad7f8a03df62
parent: 340bc8807e77fe3704591a4ae4654ed32734352b (diff)
3 files changed, 39 insertions, 46 deletions
diff --git a/myrpki/arin-to-csv.py b/myrpki/arin-to-csv.py
index f448ff1a..ca622ccf 100644
--- a/myrpki/arin-to-csv.py
+++ b/myrpki/arin-to-csv.py
@@ -3,8 +3,8 @@ Parse a WHOIS research dump and write out (just) the RPKI-relevant
 fields in myrpki-format CSV syntax.
 
 NB: The input data for this script comes from ARIN under an agreement
-that allows research use but forbids redistribution it, so if you
-think you need a copy, please talk to ARIN about it, not us.
+that allows research use but forbids redistribution, so if you think
+you need a copy of the data, please talk to ARIN about it, not us.
 
 $Id$
 
@@ -23,32 +23,7 @@ OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 PERFORMANCE OF THIS SOFTWARE.
 """
 
-import gzip, csv
-
-def parseline(line):
-    tag, sep, val = line.partition(":")
-    assert sep, "Couldn't find separator in %r" % line
-    return tag.strip(), val.strip()
-
-def main():
-    f = gzip.open("arin_db.txt.gz")
-    cur = None
-    for line in f:
-        line = line.expandtabs().strip()
-        if not line:
-            if cur:
-                cur.finish()
-            cur = None
-        elif not line.startswith("#"):
-            tag, val = parseline(line)
-            if cur is None:
-                cur = types[tag]()
-            if cur:
-                cur.set(tag, val)
-    if cur:
-        cur.finish()
-
-#db = {}
+import gzip, csv, myrpki
 
 class Handle(object):
 
@@ -63,9 +38,6 @@ class Handle(object):
             if not hasattr(self, tag):
                 return
         print repr(self)
-#       if self.OrgID not in db:
-#           db[self.OrgID] = []
-#       db[self.OrgID].append(self)
 
 class ASHandle(Handle):
 
@@ -98,14 +70,37 @@ class V6NetHandle(NetHandle):
                                      self.OrgID, self.V6NetHandle,
                                      self.NetType, self.NetRange)
 
-def DontBother():
-    return False
-
 types = {
     "ASHandle"    : ASHandle,
     "NetHandle"   : NetHandle,
-    "V6NetHandle" : V6NetHandle,
-    "POCHandle"   : DontBother,
-    "OrgID"       : DontBother }
+    "V6NetHandle" : V6NetHandle }
+
+def parseline(line):
+    tag, sep, val = line.partition(":")
+    assert sep, "Couldn't find separator in %r" % line
+    return tag.strip(), val.strip()
+
+def csvout(fn):
+    return csv.writer(open(path, "w"), dialect = myrpki.csv_dialect)
+
+def main():
+    f = gzip.open("arin_db.txt.gz")
+    cur = None
+    asn_csv = csvout("asns.csv")
+    prefix_csv = csvout("prefixes.csv")
+    for line in f:
+        line = line.expandtabs().strip()
+        if not line:
+            if cur:
+                cur.finish()
+            cur = None
+        elif not line.startswith("#"):
+            tag, val = parseline(line)
+            if cur is None:
+                cur = types[tag]() if tag in types else False
+            if cur:
+                cur.set(tag, val)
+    if cur:
+        cur.finish()
 
 main()
diff --git a/myrpki/myrpki.py b/myrpki/myrpki.py
index dbe72b10..504137aa 100644
--- a/myrpki/myrpki.py
+++ b/myrpki/myrpki.py
@@ -57,14 +57,12 @@ from xml.etree.ElementTree import Element, SubElement, ElementTree
 
 namespace = "http://www.hactrn.net/uris/rpki/myrpki/"
 
-# Dialect parameters for our use of CSV files, here to make them easy
-# to change if your site needs to do something different.  See doc for
-# the csv module in the Python standard libraries for details if you
-# need to customize these.
-
-csv_delimiter = "\t"
-csv_dialect = None
+# Dialect for our use of CSV files, here to make it easy to change if
+# your site needs to do something different.  See doc for the csv
+# module in the Python standard libraries for details if you need to
+# customize this.
 
+csv_dialect = csv.get_dialect("excel-tab")
 
 class comma_set(set):
   """
@@ -304,7 +302,7 @@ def csv_open(filename):
   You may need to tweak this function for your environment, see the
   csv module in the Python standard libraries for details.
   """
-  return csv.reader(open(filename, "rb"), dialect = csv_dialect, delimiter = csv_delimiter)
+  return csv.reader(open(filename, "rb"), dialect = csv_dialect)
 
 def PEMElement(e, tag, filename):
   """
diff --git a/myrpki/yamltest.py b/myrpki/yamltest.py
index e6c1278a..f1c34b06 100644
--- a/myrpki/yamltest.py
+++ b/myrpki/yamltest.py
@@ -293,7 +293,7 @@ class allocation(object):
     """
     path = self.path(fn)
     print "Writing", path
-    return csv.writer(open(path, "wb"), delimiter = myrpki.csv_delimiter, dialect = myrpki.csv_dialect)
+    return csv.writer(open(path, "w"), dialect = myrpki.csv_dialect)
 
   def up_down_url(self):
     """
author	Rob Austein <sra@hactrn.net>	2009-07-27 17:09:06 +0000
committer	Rob Austein <sra@hactrn.net>	2009-07-27 17:09:06 +0000
commit	ee2acb5dc9428b42aa84850af06ebe285a8eb6ca (patch)
tree	f3cf46dcc0d2a7782b4638ffcf8cad7f8a03df62
parent	340bc8807e77fe3704591a4ae4654ed32734352b (diff)