aboutsummaryrefslogtreecommitdiff
path: root/myrpki/ripe-to-csv.py
blob: e700e6691ba7efa77e5db6d0978f69ab0be94484 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
"""
Parse a WHOIS research dump and write out (just) the RPKI-relevant
fields in myrpki-format CSV syntax.

NB: The input data for this script is publicly available via FTP, but
you'll have to fetch the data from RIPE yourself, and be sure to see
the terms and conditions referenced by the data file header comments.

$Id$

Copyright (C) 2009  Internet Systems Consortium ("ISC")

Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.

THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
"""

import gzip, csv, myrpki

class Handle(dict):

  want_tags = ()

  debug = True

  def set(self, tag, val):
    if tag in self.want_tags:
      self[tag] = "".join(val.split(" "))

  def check(self):
    for tag in self.want_tags:
      if not tag in self:
        return False
    if self.debug:
      self.log()
    return True

  def __repr__(self):
    return "<%s %s>" % (self.__class__.__name__,
                        " ".join("%s:%s" % (tag, self.get(tag, "?"))
                                 for tag in self.want_tags))

  def log(self):
    print repr(self)

  def finish(self, ctx):
    self.check()

class as_block(Handle):
  # This one is less useful than I had hoped, no useful links to owners
  want_tags = ("as-block", "mnt-by", "org", "mnt-lower")

class as_set(Handle):
  # This is probably useless
  want_tags = ("as-set", "mnt-by", "members")

class aut_num(Handle):
  want_tags = ("aut-num", "mnt-by", "as-name")

class inetnum(Handle):
  want_tags = ("inetnum", "mnt-by", "netname")
  
class inet6num(Handle):
  want_tags = ("inet6num", "mnt-by", "netname")

class main(object):

  types = dict((x.want_tags[0], x) for x in (as_block, aut_num, as_set, inetnum, inet6num))

  @staticmethod
  def csvout(fn):
    return csv.writer(open(fn, "w"), dialect = myrpki.csv_dialect)

  def finish_statement(self, done):
    if self.statement:
      tag, sep, val = self.statement.partition(":")
      assert sep, "Couldn't find separator in %r" % self.statement
      tag = tag.strip()
      val = val.strip()
      if self.cur is None:
        self.cur = self.types[tag]() if tag in self.types else False
      if self.cur is not False:
        self.cur.set(tag, val)
    if done and self.cur:
      self.cur.finish(self)
      self.cur = None

  def __init__(self):
    self.asns = self.csvout("asns.csv")
    self.prefixes = self.csvout("prefixes.csv")
    f = gzip.open("ripe.db.gz")
    self.statement = ""
    self.cur = None
    for line in f:
      line = line.expandtabs().partition("#")[0].rstrip("\n")
      if line and not line[0].isalpha():
        self.statement += line[1:] if line[0] == "+" else line
      else:
        self.finish_statement(not line)
        self.statement = line
    self.finish_statement(True)

main()