Package pyxb :: Package utils :: Module saxdom
[hide private]
[frames] | no frames]

Source Code for Module pyxb.utils.saxdom

  1  # Copyright 2009-2012, Peter A. Bigot 
  2  # 
  3  # Licensed under the Apache License, Version 2.0 (the "License"); you may 
  4  # not use this file except in compliance with the License. You may obtain a 
  5  # copy of the License at: 
  6  # 
  7  #            http://www.apache.org/licenses/LICENSE-2.0 
  8  # 
  9  # Unless required by applicable law or agreed to in writing, software 
 10  # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 
 11  # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 
 12  # License for the specific language governing permissions and limitations 
 13  # under the License. 
 14   
 15  """This module contains support for a DOM tree representation from an XML 
 16  document using a SAX parser. 
 17   
 18  This functionality exists because we need a DOM interface to generate the 
 19  binding classses, but the Python C{xml.dom.minidom} package does not support 
 20  location information.  The SAX interface does, so we have a SAX content 
 21  handler which converts the SAX events into a DOM tree. 
 22   
 23  This is not a general-purpose DOM capability; only a small subset of the DOM 
 24  interface is supported, and only for storing the XML information, not for 
 25  converting it back into document format. 
 26  """ 
 27   
 28  import xml.dom 
 29  import saxutils 
 30  import StringIO 
 31  import pyxb.namespace 
32 33 -def _DumpDOM (n, depth=0):
34 """Utility function to print a DOM tree.""" 35 36 pfx = ' ' * depth 37 if (xml.dom.Node.ELEMENT_NODE == n.nodeType): 38 print '%sElement[%d] %s %s with %d children' % (pfx, n._indexInParent(), n, pyxb.namespace.ExpandedName(n.name), len(n.childNodes)) 39 ins = pyxb.namespace.resolution.NamespaceContext.GetNodeContext(n).inScopeNamespaces() 40 print '%s%s' % (pfx, ' ; '.join([ '%s=%s' % (_k, _v.uri()) for (_k, _v) in ins.items()])) 41 for (k, v) in n.attributes.items(): 42 print '%s %s=%s' % (pfx, pyxb.namespace.ExpandedName(k), v) 43 for cn in n.childNodes: 44 _DumpDOM(cn, depth+1) 45 elif (xml.dom.Node.TEXT_NODE == n.nodeType): 46 #print '%sText "%s"' % (pfx, n.value) 47 pass 48 elif (xml.dom.Node.DOCUMENT_NODE == n.nodeType): 49 print 'Document node' 50 _DumpDOM(n.firstChild, depth) 51 else: 52 print 'UNRECOGNIZED %s' % (n.nodeType,)
53
54 -class _DOMSAXHandler (saxutils.BaseSAXHandler):
55 """SAX handler class that transforms events into a DOM tree.""" 56
57 - def document (self):
58 """The document that is the root of the generated tree.""" 59 return self.__document
60 __document = None 61
62 - def startDocument (self):
63 super(_DOMSAXHandler, self).startDocument() 64 self.__document = Document(namespace_context=self.namespaceContext())
65
66 - def endDocument (self):
67 content = self.elementState().content() 68 if 0 < len(content): 69 ( content, element_use, maybe_element ) = content[0] 70 self.__document.appendChild(content)
71 #_DumpDOM(content) 72
73 - def startElementNS (self, name, qname, attrs):
74 (this_state, parent_state, ns_ctx, name_en) = super(_DOMSAXHandler, self).startElementNS(name, qname, attrs) 75 this_state.__attributes = NamedNodeMap() 76 for name in attrs.getNames(): 77 attr_en = pyxb.namespace.ExpandedName(name) 78 value = attrs.getValue(name) 79 this_state.__attributes._addItem(Attr(expanded_name=attr_en, namespace_context=ns_ctx, value=value, location=this_state.location()))
80
81 - def endElementNS (self, name, qname):
82 this_state = super(_DOMSAXHandler, self).endElementNS(name, qname) 83 ns_ctx = this_state.namespaceContext() 84 element = Element(namespace_context=ns_ctx, expanded_name=this_state.expandedName(), attributes=this_state.__attributes, location=this_state.location()) 85 for ( content, element_use, maybe_element ) in this_state.content(): 86 if isinstance(content, Node): 87 element.appendChild(content) 88 else: 89 element.appendChild(Text(content, namespace_context=ns_ctx)) 90 parent_state = this_state.parentState() 91 parent_state.addElementContent(element, None)
92 #print '%s %s has %d children' % (element.namespaceURI, element.localName, len(element.childNodes))
93 94 -def parse (stream, **kw):
95 """Parse a stream containing an XML document and return the DOM tree 96 representing its contents. 97 98 Keywords not described here are passed to L{saxutils.make_parser}. 99 100 @param stream: An object presenting the standard file C{read} interface 101 from which the document can be read. 102 103 @keyword content_handler_constructor: Input is overridden to assign this a 104 value of L{_DOMSAXHandler}. 105 106 @rtype: C{xml.dom.Document} 107 """ 108 109 kw['content_handler_constructor'] = _DOMSAXHandler 110 saxer = saxutils.make_parser(**kw) 111 handler = saxer.getContentHandler() 112 saxer.parse(stream) 113 return handler.document()
114
115 -def parseString (text, **kw):
116 """Parse a string holding an XML document and return the corresponding DOM 117 tree.""" 118 # XML parser doesn't really like unicode strings 119 if isinstance(text, unicode): 120 text = text.encode(pyxb._InputEncoding) 121 return parse(StringIO.StringIO(text), **kw)
122
123 -class Node (xml.dom.Node, pyxb.utils.utility.Locatable_mixin):
124 """Base for the minimal DOM interface required by PyXB."""
125 - def __init__ (self, node_type, **kw):
126 location = kw.pop('location', None) 127 if location is not None: 128 pyxb.utils.utility.Locatable_mixin.__init__(self, location=location) 129 self.__nodeType = node_type 130 self.__parentNode = None 131 self.__childNodes = [] 132 self.__namespaceContext = kw['namespace_context'] 133 self.__value = kw.get('value') 134 self.__attributes = kw.get('attributes') 135 expanded_name = kw.get('expanded_name') 136 if expanded_name is not None: 137 self.__name = expanded_name.uriTuple() 138 self.__namespaceURI = expanded_name.namespaceURI() 139 self.__localName = expanded_name.localName() 140 self.__namespaceContext.setNodeContext(self)
141 142 location = property(lambda _s: _s._location()) 143 144 __name = None 145 @property
146 - def name (self):
147 return self.__name
148 @property
149 - def expanded_name (self):
151 __namespaceURI = None 152 namespaceURI = property(lambda _s: _s.__namespaceURI) 153 __localName = None 154 localName = property(lambda _s: _s.__localName) 155 __value = None 156 value = property(lambda _s: _s.__value) 157
158 - def _indexInParent (self): return self.__indexInParent
159
160 - def __childIfPresent (self, index):
161 if index < len(self.__childNodes): 162 return self.__childNodes[index] 163 return None
164
165 - def appendChild (self, new_child):
166 new_child._setParentNode(self, len(self.__childNodes)) 167 self.__childNodes.append(new_child)
168
169 - def _setParentNode (self, parent_node, index_in_parent):
170 self.__parentNode = parent_node 171 self.__indexInParent = index_in_parent
172
173 - def _setAttributes (self, attributes):
174 assert self.__attributes is None 175 self.__attributes = attributes
176 __attributes = None 177 178 nodeType = property(lambda _s: _s.__nodeType) 179 parentNode = property(lambda _s: _s.__parentNode) 180 firstChild = property(lambda _s: _s.__childIfPresent(0)) 181 childNodes = property(lambda _s: _s.__childNodes) 182 attributes = property(lambda _s: _s.__attributes) 183 184 nextSibling = property(lambda _s: _s.parentNode.__childIfPresent(_s.__indexInParent+1)) 185
186 - def hasAttributeNS (self, ns_uri, local_name):
187 return self.getAttributeNodeNS(ns_uri, local_name) is not None
188
189 - def getAttributeNodeNS (self, ns_uri, local_name):
190 return self.__attributes._getAttr( (ns_uri, local_name) )
191
192 - def getAttributeNS (self, ns_uri, local_name):
193 rv = self.getAttributeNodeNS(ns_uri, local_name) 194 if rv is None: 195 return '' 196 return rv.value
197
198 -class Document (Node):
199 """Add the documentElement interface."""
200 - def __init__ (self, **kw):
201 super(Document, self).__init__(node_type=xml.dom.Node.DOCUMENT_NODE, **kw)
202 203 documentElement = Node.firstChild
204
205 -class Attr (Node):
206 """Add the nodeName and nodeValue interface."""
207 - def __init__ (self, **kw):
208 super(Attr, self).__init__(node_type=xml.dom.Node.ATTRIBUTE_NODE, **kw)
209 nodeName = Node.name 210 nodeValue = Node.value
211
212 -class NamedNodeMap (dict):
213 """Implement that portion of NamedNodeMap required to satisfy PyXB's 214 needs.""" 215 __members = None 216
217 - def __init__ (self):
218 super(NamedNodeMap, self).__init__() 219 self.__members = []
220 221 length = property(lambda _s: len(_s.__members))
222 - def item (self, index):
223 return self.__members[index]
224
225 - def _addItem (self, attr):
226 self[attr.name] = attr.value 227 assert pyxb.namespace.resolution.NamespaceContext.GetNodeContext(attr) is not None 228 self.__members.append(attr)
229
230 - def _getAttr (self, name):
231 for attr in self.__members: 232 if attr.name == name: 233 return attr 234 return None
235
236 -class Element (Node):
237 - def __init__ (self, **kw):
238 super(Element, self).__init__(node_type=xml.dom.Node.ELEMENT_NODE, **kw) 239 assert self.attributes is not None
240 tagName = Node.localName 241 nodeName = Node.localName
242
243 -class _CharacterData (Node):
244 """Abstract base for anything holding text data.""" 245 data = Node.value
246
247 -class Text (_CharacterData):
248 - def __init__ (self, text, **kw):
249 super(Text, self).__init__(value=text, node_type=xml.dom.Node.TEXT_NODE, **kw)
250
251 -class Comment (_CharacterData):
252 - def __init__ (self, text, **kw):
253 super(Text, self).__init__(value=text, node_type=xml.dom.Node.COMMENT_NODE, **kw)
254 255 if '__main__' == __name__: 256 import sys 257 xml_file = 'examples/tmsxtvd/tmsdatadirect_sample.xml' 258 if 1 < len(sys.argv): 259 xml_file = sys.argv[1] 260 261 doc = parse(file(xml_file)) 262 263 ## Local Variables: 264 ## fill-column:78 265 ## End: 266