Package pyxb :: Package utils :: Module saxdom
[hide private]
[frames] | no frames]

Source Code for Module pyxb.utils.saxdom

  1  # -*- coding: utf-8 -*- 
  2  # Copyright 2009-2013, Peter A. Bigot 
  3  # 
  4  # Licensed under the Apache License, Version 2.0 (the "License"); you may 
  5  # not use this file except in compliance with the License. You may obtain a 
  6  # copy of the License at: 
  7  # 
  8  #            http://www.apache.org/licenses/LICENSE-2.0 
  9  # 
 10  # Unless required by applicable law or agreed to in writing, software 
 11  # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 
 12  # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 
 13  # License for the specific language governing permissions and limitations 
 14  # under the License. 
 15   
 16  """This module contains support for a DOM tree representation from an XML 
 17  document using a SAX parser. 
 18   
 19  This functionality exists because we need a DOM interface to generate the 
 20  binding classses, but the Python C{xml.dom.minidom} package does not support 
 21  location information.  The SAX interface does, so we have a SAX content 
 22  handler which converts the SAX events into a DOM tree. 
 23   
 24  This is not a general-purpose DOM capability; only a small subset of the DOM 
 25  interface is supported, and only for storing the XML information, not for 
 26  converting it back into document format. 
 27  """ 
 28   
 29  from __future__ import print_function 
 30  import logging 
 31  import io 
 32  import xml.dom 
 33  import pyxb.utils.saxutils 
 34  from pyxb.utils import six 
 35  import pyxb.namespace 
 36   
 37  _log = logging.getLogger(__name__) 
 38   
39 -def _DumpDOM (n, depth=0):
40 """Utility function to print a DOM tree.""" 41 42 pfx = ' ' * depth 43 if (xml.dom.Node.ELEMENT_NODE == n.nodeType): 44 print('%sElement[%d] %s %s with %d children' % (pfx, n._indexInParent(), n, pyxb.namespace.ExpandedName(n.name), len(n.childNodes))) 45 ins = pyxb.namespace.NamespaceContext.GetNodeContext(n).inScopeNamespaces() 46 print('%s%s' % (pfx, ' ; '.join([ '%s=%s' % (_k, _v.uri()) for (_k, _v) in ins.items()]))) 47 for (k, v) in six.iteritems(n.attributes): 48 print('%s %s=%s' % (pfx, pyxb.namespace.ExpandedName(k), v)) 49 for cn in n.childNodes: 50 _DumpDOM(cn, depth+1) 51 elif (xml.dom.Node.TEXT_NODE == n.nodeType): 52 #print '%sText "%s"' % (pfx, n.value) 53 pass 54 elif (xml.dom.Node.DOCUMENT_NODE == n.nodeType): 55 print('Document node') 56 _DumpDOM(n.firstChild, depth) 57 else: 58 print('UNRECOGNIZED %s' % (n.nodeType,))
59
60 -class _DOMSAXHandler (pyxb.utils.saxutils.BaseSAXHandler):
61 """SAX handler class that transforms events into a DOM tree.""" 62
63 - def document (self):
64 """The document that is the root of the generated tree.""" 65 return self.__document
66 __document = None 67
68 - def startDocument (self):
69 super(_DOMSAXHandler, self).startDocument() 70 self.__document = Document(namespace_context=self.namespaceContext())
71
72 - def endDocument (self):
73 content = self.elementState().content() 74 if 0 < len(content): 75 assert content[0].maybe_element 76 self.__document.appendChild(content[0].item)
77 #_DumpDOM(content) 78
79 - def startElementNS (self, name, qname, attrs):
80 (this_state, parent_state, ns_ctx, name_en) = super(_DOMSAXHandler, self).startElementNS(name, qname, attrs) 81 this_state.__attributes = NamedNodeMap() 82 for name in attrs.getNames(): 83 attr_en = pyxb.namespace.ExpandedName(name) 84 value = attrs.getValue(name) 85 this_state.__attributes._addItem(Attr(expanded_name=attr_en, namespace_context=ns_ctx, value=value, location=this_state.location()))
86
87 - def endElementNS (self, name, qname):
88 this_state = super(_DOMSAXHandler, self).endElementNS(name, qname) 89 ns_ctx = this_state.namespaceContext() 90 element = Element(namespace_context=ns_ctx, expanded_name=this_state.expandedName(), attributes=this_state.__attributes, location=this_state.location()) 91 for info in this_state.content(): 92 if isinstance(info.item, Node): 93 element.appendChild(info.item) 94 else: 95 element.appendChild(Text(info.item, namespace_context=ns_ctx)) 96 parent_state = this_state.parentState() 97 parent_state.addElementContent(this_state.location(), element, None)
98
99 -def parse (stream, **kw):
100 """Parse a stream containing an XML document and return the DOM tree 101 representing its contents. 102 103 Keywords not described here are passed to L{pyxb.utils.saxutils.make_parser}. 104 105 @param stream: An object presenting the standard file C{read} interface 106 from which the document can be read. The content should be data, not text. 107 108 @keyword content_handler_constructor: Input is overridden to assign this a 109 value of L{_DOMSAXHandler}. 110 111 @rtype: C{xml.dom.Document} 112 """ 113 114 kw['content_handler_constructor'] = _DOMSAXHandler 115 saxer = pyxb.utils.saxutils.make_parser(**kw) 116 handler = saxer.getContentHandler() 117 saxer.parse(stream) 118 return handler.document()
119
120 -def parseString (xml_text, **kw):
121 """Parse a string holding an XML document and return the corresponding DOM 122 tree. 123 124 @param xml_text: the XML content to be parsed, in a text representation.""" 125 # SAX parser operates on data, not text. 126 xmld = xml_text 127 if isinstance(xmld, six.text_type): 128 xmld = xmld.encode(pyxb._InputEncoding) 129 return parse(io.BytesIO(xmld), **kw)
130
131 -class Node (xml.dom.Node, pyxb.utils.utility.Locatable_mixin):
132 """Base for the minimal DOM interface required by PyXB."""
133 - def __init__ (self, node_type, **kw):
134 location = kw.pop('location', None) 135 if location is not None: 136 pyxb.utils.utility.Locatable_mixin.__init__(self, location=location) 137 self.__nodeType = node_type 138 self.__parentNode = None 139 self.__indexInParent = None 140 self.__childNodes = [] 141 self.__namespaceContext = kw['namespace_context'] 142 self.__value = kw.get('value') 143 self.__attributes = kw.get('attributes') 144 en = kw.get('expanded_name') 145 if en is not None: 146 self.__expandedName = en 147 ns = en.namespace() 148 if (ns is not None) and not (ns.prefix() is None): 149 assert not ns.isAbsentNamespace() 150 self.__prefix = ns.prefix() 151 if (ns is not None): 152 self.__namespaceURI = ns.uri() 153 self.__localName = en.localName() 154 if self.__prefix: 155 self.__tagName = '%s:%s' % (self.__prefix, self.__localName) 156 else: 157 self.__tagName = self.__localName 158 self.__namespaceContext.setNodeContext(self)
159 160 location = property(lambda _s: _s._location()) 161 162 __expandedName = None 163 _expandedName = property(lambda _s: _s.__expandedName) 164 __prefix = '' 165 prefix = property(lambda _s: _s.__prefix) 166 __tagName = '' 167 tagName = property(lambda _s: _s.__tagName) 168 __namespaceURI = None 169 namespaceURI = property(lambda _s: _s.__namespaceURI) 170 __localName = None 171 localName = property(lambda _s: _s.__localName) 172 __value = None 173 value = property(lambda _s: _s.__value) 174
175 - def _indexInParent (self): return self.__indexInParent
176
177 - def __childIfPresent (self, index):
178 if index < len(self.__childNodes): 179 return self.__childNodes[index] 180 return None
181
182 - def appendChild (self, new_child):
183 new_child._setParentNode(self, len(self.__childNodes)) 184 self.__childNodes.append(new_child)
185
186 - def _setParentNode (self, parent_node, index_in_parent):
187 self.__parentNode = parent_node 188 self.__indexInParent = index_in_parent
189
190 - def _setAttributes (self, attributes):
191 assert self.__attributes is None 192 self.__attributes = attributes
193 __attributes = None 194 195 nodeType = property(lambda _s: _s.__nodeType) 196 parentNode = property(lambda _s: _s.__parentNode) 197 firstChild = property(lambda _s: _s.__childIfPresent(0)) 198 childNodes = property(lambda _s: _s.__childNodes) 199 attributes = property(lambda _s: _s.__attributes) 200 201 nextSibling = property(lambda _s: _s.parentNode.__childIfPresent(_s.__indexInParent+1)) 202
203 - def hasAttributeNS (self, ns_uri, local_name):
204 return self.getAttributeNodeNS(ns_uri, local_name) is not None
205
206 - def getAttributeNodeNS (self, ns_uri, local_name):
207 return self.__attributes._getAttr(pyxb.namespace.ExpandedName(ns_uri, local_name))
208
209 - def getAttributeNS (self, ns_uri, local_name):
210 rv = self.getAttributeNodeNS(ns_uri, local_name) 211 if rv is None: 212 return '' 213 return rv.value
214
215 -class Document (Node):
216 """Add the documentElement interface."""
217 - def __init__ (self, **kw):
218 super(Document, self).__init__(node_type=xml.dom.Node.DOCUMENT_NODE, **kw)
219 220 documentElement = Node.firstChild
221
222 -class Attr (Node):
223 """Add the nodeName and nodeValue interface."""
224 - def __init__ (self, **kw):
225 super(Attr, self).__init__(node_type=xml.dom.Node.ATTRIBUTE_NODE, **kw)
226 name = Node.tagName 227 nodeName = Node.tagName 228 nodeValue = Node.value
229
230 -class NamedNodeMap (object):
231 """Implement that portion of NamedNodeMap required to satisfy PyXB's 232 needs.""" 233 __members = None 234 __memberMap = None 235
236 - def __init__ (self):
237 super(NamedNodeMap, self).__init__() 238 self.__members = [] 239 self.__memberMap = {}
240 241 length = property(lambda _s: len(_s.__members))
242 - def item (self, index):
243 return self.__members[index]
244
245 - def _addItem (self, attr):
246 assert pyxb.namespace.NamespaceContext.GetNodeContext(attr) is not None 247 self.__members.append(attr) 248 en = attr._expandedName 249 if en is not None: 250 self.__memberMap[en] = attr
251
252 - def _getAttr (self, name):
253 rv = self.__memberMap.get(name) 254 if rv is not None: 255 return rv 256 return None
257
258 -class Element (Node):
259 - def __init__ (self, **kw):
260 super(Element, self).__init__(node_type=xml.dom.Node.ELEMENT_NODE, **kw) 261 assert self.attributes is not None
262 tagName = Node.localName 263 nodeName = Node.localName
264
265 -class _CharacterData (Node):
266 """Abstract base for anything holding text data.""" 267 data = Node.value
268
269 -class Text (_CharacterData):
270 - def __init__ (self, text, **kw):
271 super(Text, self).__init__(value=text, node_type=xml.dom.Node.TEXT_NODE, **kw)
272
273 -class Comment (_CharacterData):
274 - def __init__ (self, text, **kw):
275 super(Comment, self).__init__(value=text, node_type=xml.dom.Node.COMMENT_NODE, **kw)
276 277 if '__main__' == __name__: 278 import sys 279 xml_file = 'examples/tmsxtvd/tmsdatadirect_sample.xml' 280 if 1 < len(sys.argv): 281 xml_file = sys.argv[1] 282 283 doc = parse(open(xml_file)) 284 285 ## Local Variables: 286 ## fill-column:78 287 ## End: 288