| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| |
| """ |
| Parsers for SGML and XML to dom. |
| """ |
| |
| import sgmllib, xml.sax.handler |
| from dom import * |
| |
| class Parser: |
| |
| def __init__(self): |
| self.tree = Tree() |
| self.node = self.tree |
| self.nodes = [] |
| |
| def line(self, id, lineno, colno): |
| while self.nodes: |
| n = self.nodes.pop() |
| n._line(id, lineno, colno) |
| |
| def add(self, node): |
| self.node.add(node) |
| self.nodes.append(node) |
| |
| def start(self, name, attrs): |
| tag = Tag(name, *attrs) |
| self.add(tag) |
| self.node = tag |
| |
| def end(self, name): |
| self.balance(name) |
| self.node = self.node.parent |
| |
| def data(self, data): |
| children = self.node.children |
| if children and isinstance(children[-1], Data): |
| children[-1].data += data |
| else: |
| self.add(Data(data)) |
| |
| def comment(self, comment): |
| self.add(Comment(comment)) |
| |
| def entity(self, ref): |
| self.add(Entity(ref)) |
| |
| def character(self, ref): |
| self.add(Character(ref)) |
| |
| def balance(self, name = None): |
| while self.node != self.tree and name != self.node.name: |
| self.node.parent.extend(self.node.children) |
| del self.node.children[:] |
| self.node.singleton = True |
| self.node = self.node.parent |
| |
| |
| class SGMLParser(sgmllib.SGMLParser): |
| |
| def __init__(self, entitydefs = None): |
| sgmllib.SGMLParser.__init__(self) |
| if entitydefs == None: |
| self.entitydefs = {} |
| else: |
| self.entitydefs = entitydefs |
| self.parser = Parser() |
| |
| def unknown_starttag(self, name, attrs): |
| self.parser.start(name, attrs) |
| |
| def handle_data(self, data): |
| self.parser.data(data) |
| |
| def handle_comment(self, comment): |
| self.parser.comment(comment) |
| |
| def unknown_entityref(self, ref): |
| self.parser.entity(ref) |
| |
| def unknown_charref(self, ref): |
| self.parser.character(ref) |
| |
| def unknown_endtag(self, name): |
| self.parser.end(name) |
| |
| def close(self): |
| sgmllib.SGMLParser.close(self) |
| self.parser.balance() |
| assert self.parser.node == self.parser.tree |
| |
| class XMLParser(xml.sax.handler.ContentHandler): |
| |
| def __init__(self): |
| self.parser = Parser() |
| self.locator = None |
| |
| def line(self): |
| if self.locator != None: |
| self.parser.line(self.locator.getSystemId(), |
| self.locator.getLineNumber(), |
| self.locator.getColumnNumber()) |
| |
| def setDocumentLocator(self, locator): |
| self.locator = locator |
| |
| def startElement(self, name, attrs): |
| self.parser.start(name, attrs.items()) |
| self.line() |
| |
| def endElement(self, name): |
| self.parser.end(name) |
| self.line() |
| |
| def characters(self, content): |
| self.parser.data(content) |
| self.line() |
| |
| def skippedEntity(self, name): |
| self.parser.entity(name) |
| self.line() |
| |