diff options
Diffstat (limited to 'scripts/rpki/sax_utils.py')
-rw-r--r-- | scripts/rpki/sax_utils.py | 30 |
1 files changed, 28 insertions, 2 deletions
diff --git a/scripts/rpki/sax_utils.py b/scripts/rpki/sax_utils.py index 1333df1a..acb7a5a9 100644 --- a/scripts/rpki/sax_utils.py +++ b/scripts/rpki/sax_utils.py @@ -3,8 +3,17 @@ import xml.sax class handler(xml.sax.handler.ContentHandler): - """ - SAX handler for RPKI protocols. + """SAX handler for RPKI protocols. + + This class provides some basic amenities for parsing protocol XML of + the kind we use in the RPKI protocols, including whacking all the + protocol element text into US-ASCII, simplifying accumulation of + text fields, and hiding some of the fun relating to XML namespaces. + + General assumption: by the time this parsing code gets invoked, the + XML has already passed RelaxNG validation, so we only have to check + for errors that the schema can't catch, and we don't have to play as + many XML namespace games. """ def __init__(self): @@ -12,15 +21,28 @@ class handler(xml.sax.handler.ContentHandler): self.stack = [] def startElementNS(self, name, qname, attrs): + """Redirect startElementNS() events to startElement().""" return self.startElement(name[1], attrs) def endElementNS(self, name, qname): + """Redirect endElementNS() events to endElement().""" return self.endElement(name[1]) def characters(self, content): + """Accumulate a chuck of element content (text).""" self.text += content def startElement(self, name, attrs): + """Handle startElement() events. + + We maintain a stack of nested elements under construction so that + we can feed events directly to the current element rather than + having to pass them through all the nesting elements. + + If the stack is empty, this event is for the outermost element, so + we call a virtual method to create the corresponding object and + that's the object we'll be returning as our final result. + """ a = dict() for k,v in attrs.items(): if isinstance(k, tuple): @@ -37,6 +59,10 @@ class handler(xml.sax.handler.ContentHandler): self.stack[-1].startElement(self.stack, name, a) def endElement(self, name): + """Handle endElement() events. + + Mostly this means handling any accumulated element text. + """ text = self.text.encode("ascii").strip() self.text = "" self.stack[-1].endElement(self.stack, name, text) |