Package pyxb :: Package utils :: Module saxdom
[hide private]
[frames] | no frames]

Source Code for Module pyxb.utils.saxdom

  1  # Copyright 2009, Peter A. Bigot 
  2  # 
  3  # Licensed under the Apache License, Version 2.0 (the "License"); you may 
  4  # not use this file except in compliance with the License. You may obtain a 
  5  # copy of the License at: 
  6  # 
  7  #            http://www.apache.org/licenses/LICENSE-2.0 
  8  # 
  9  # Unless required by applicable law or agreed to in writing, software 
 10  # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 
 11  # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 
 12  # License for the specific language governing permissions and limitations 
 13  # under the License. 
 14   
 15  """This module contains support for a DOM tree representation from an XML 
 16  document using a SAX parser. 
 17   
 18  This functionality exists because we need a DOM interface to generate the 
 19  binding classses, but the Python C{xml.dom.minidom} package does not support 
 20  location information.  The SAX interface does, so we have a SAX content 
 21  handler which converts the SAX events into a DOM tree. 
 22   
 23  This is not a general-purpose DOM capability; only a small subset of the DOM 
 24  interface is supported, and only for storing the XML information, not for 
 25  converting it back into document format. 
 26  """ 
 27   
 28  import xml.dom 
 29  import saxutils 
 30  import StringIO 
 31  import pyxb.namespace 
32 33 -def _DumpDOM (n, depth=0):
34 """Utility function to print a DOM tree.""" 35 36 pfx = ' ' * depth 37 if (xml.dom.Node.ELEMENT_NODE == n.nodeType): 38 print '%sElement[%d] %s %s with %d children' % (pfx, n._indexInParent(), n, pyxb.namespace.ExpandedName(n.name), len(n.childNodes)) 39 ins = pyxb.namespace.resolution.NamespaceContext.GetNodeContext(n).inScopeNamespaces() 40 print '%s%s' % (pfx, ' ; '.join([ '%s=%s' % (_k, _v.uri()) for (_k, _v) in ins.items()])) 41 for (k, v) in n.attributes.items(): 42 print '%s %s=%s' % (pfx, pyxb.namespace.ExpandedName(k), v) 43 for cn in n.childNodes: 44 _DumpDOM(cn, depth+1) 45 elif (xml.dom.Node.TEXT_NODE == n.nodeType): 46 #print '%sText "%s"' % (pfx, n.value) 47 pass 48 elif (xml.dom.Node.DOCUMENT_NODE == n.nodeType): 49 print 'Document node' 50 _DumpDOM(n.firstChild, depth) 51 else: 52 print 'UNRECOGNIZED %s' % (n.nodeType,)
53
54 -class _DOMSAXHandler (saxutils.BaseSAXHandler):
55 """SAX handler class that transforms events into a DOM tree.""" 56
57 - def document (self):
58 """The document that is the root of the generated tree.""" 59 return self.__document
60 __document = None 61
62 - def startDocument (self):
63 super(_DOMSAXHandler, self).startDocument() 64 self.__document = Document(namespace_context=self.namespaceContext())
65
66 - def endDocument (self):
67 content = self.elementState().content() 68 if 0 < len(content): 69 ( content, element_use, maybe_element ) = content[0] 70 self.__document.appendChild(content)
71 #_DumpDOM(content) 72
73 - def startElementNS (self, name, qname, attrs):
74 (this_state, parent_state, ns_ctx, name_en) = super(_DOMSAXHandler, self).startElementNS(name, qname, attrs) 75 this_state.__attributes = NamedNodeMap() 76 for name in attrs.getNames(): 77 attr_en = pyxb.namespace.ExpandedName(name) 78 value = attrs.getValue(name) 79 this_state.__attributes._addItem(Attr(expanded_name=attr_en, namespace_context=ns_ctx, value=value, location=this_state.location()))
80
81 - def endElementNS (self, name, qname):
82 this_state = super(_DOMSAXHandler, self).endElementNS(name, qname) 83 ns_ctx = this_state.namespaceContext() 84 element = Element(namespace_context=ns_ctx, expanded_name=this_state.expandedName(), attributes=this_state.__attributes, location=this_state.location()) 85 for ( content, element_use, maybe_element ) in this_state.content(): 86 if isinstance(content, Node): 87 element.appendChild(content) 88 else: 89 element.appendChild(Text(content, namespace_context=ns_ctx)) 90 parent_state = this_state.parentState() 91 parent_state.addElementContent(element, None)
92 #print '%s %s has %d children' % (element.namespaceURI, element.localName, len(element.childNodes))
93 94 -def parse (stream, **kw):
95 """Parse a stream containing an XML document and return the DOM tree 96 representing its contents. 97 98 Keywords not described here are passed to L{saxutils.make_parser}. 99 100 @param stream: An object presenting the standard file C{read} interface 101 from which the document can be read. 102 103 @keyword content_handler_constructor: Input is overridden to assign this a 104 value of L{_DOMSAXHandler}. 105 106 @rtype: C{xml.dom.Document} 107 """ 108 109 kw['content_handler_constructor'] = _DOMSAXHandler 110 saxer = saxutils.make_parser(**kw) 111 handler = saxer.getContentHandler() 112 saxer.parse(stream) 113 return handler.document()
114
115 -def parseString (text, **kw):
116 """Parse a string holding an XML document and return the corresponding DOM 117 tree.""" 118 119 return parse(StringIO.StringIO(text), **kw)
120
121 -class Node (xml.dom.Node, pyxb.utils.utility.Locatable_mixin):
122 """Base for the minimal DOM interface required by PyXB."""
123 - def __init__ (self, node_type, **kw):
124 location = kw.pop('location', None) 125 if location is not None: 126 pyxb.utils.utility.Locatable_mixin.__init__(self, location=location) 127 self.__nodeType = node_type 128 self.__parentNode = None 129 self.__childNodes = [] 130 self.__namespaceContext = kw['namespace_context'] 131 self.__value = kw.get('value') 132 self.__attributes = kw.get('attributes') 133 expanded_name = kw.get('expanded_name') 134 if expanded_name is not None: 135 self.__name = expanded_name.uriTuple() 136 self.__namespaceURI = expanded_name.namespaceURI() 137 self.__localName = expanded_name.localName() 138 self.__namespaceContext.setNodeContext(self)
139 140 location = property(lambda _s: _s._location()) 141 142 __name = None 143 @property
144 - def name (self):
145 return self.__name
146 @property
147 - def expanded_name (self):
149 __namespaceURI = None 150 namespaceURI = property(lambda _s: _s.__namespaceURI) 151 __localName = None 152 localName = property(lambda _s: _s.__localName) 153 __value = None 154 value = property(lambda _s: _s.__value) 155
156 - def _indexInParent (self): return self.__indexInParent
157
158 - def __childIfPresent (self, index):
159 if index < len(self.__childNodes): 160 return self.__childNodes[index] 161 return None
162
163 - def appendChild (self, new_child):
164 new_child._setParentNode(self, len(self.__childNodes)) 165 self.__childNodes.append(new_child)
166
167 - def _setParentNode (self, parent_node, index_in_parent):
168 self.__parentNode = parent_node 169 self.__indexInParent = index_in_parent
170
171 - def _setAttributes (self, attributes):
172 assert self.__attributes is None 173 self.__attributes = attributes
174 __attributes = None 175 176 nodeType = property(lambda _s: _s.__nodeType) 177 parentNode = property(lambda _s: _s.__parentNode) 178 firstChild = property(lambda _s: _s.__childIfPresent(0)) 179 childNodes = property(lambda _s: _s.__childNodes) 180 attributes = property(lambda _s: _s.__attributes) 181 182 nextSibling = property(lambda _s: _s.parentNode.__childIfPresent(_s.__indexInParent+1)) 183
184 - def hasAttributeNS (self, ns_uri, local_name):
185 return self.getAttributeNodeNS(ns_uri, local_name) is not None
186
187 - def getAttributeNodeNS (self, ns_uri, local_name):
188 return self.__attributes._getAttr( (ns_uri, local_name) )
189
190 - def getAttributeNS (self, ns_uri, local_name):
191 rv = self.getAttributeNodeNS(ns_uri, local_name) 192 if rv is None: 193 return '' 194 return rv.value
195
196 -class Document (Node):
197 """Add the documentElement interface."""
198 - def __init__ (self, **kw):
199 super(Document, self).__init__(node_type=xml.dom.Node.DOCUMENT_NODE, **kw)
200 201 documentElement = Node.firstChild
202
203 -class Attr (Node):
204 """Add the nodeName and nodeValue interface."""
205 - def __init__ (self, **kw):
206 super(Attr, self).__init__(node_type=xml.dom.Node.ATTRIBUTE_NODE, **kw)
207 nodeName = Node.name 208 nodeValue = Node.value
209
210 -class NamedNodeMap (dict):
211 """Implement that portion of NamedNodeMap required to satisfy PyXB's 212 needs.""" 213 __members = None 214
215 - def __init__ (self):
216 super(NamedNodeMap, self).__init__() 217 self.__members = []
218 219 length = property(lambda _s: len(_s.__members))
220 - def item (self, index):
221 return self.__members[index]
222
223 - def _addItem (self, attr):
224 self[attr.name] = attr.value 225 assert pyxb.namespace.resolution.NamespaceContext.GetNodeContext(attr) is not None 226 self.__members.append(attr)
227
228 - def _getAttr (self, name):
229 for attr in self.__members: 230 if attr.name == name: 231 return attr 232 return None
233
234 -class Element (Node):
235 - def __init__ (self, **kw):
236 super(Element, self).__init__(node_type=xml.dom.Node.ELEMENT_NODE, **kw) 237 assert self.attributes is not None
238 nodeName = Node.localName
239
240 -class _CharacterData (Node):
241 """Abstract base for anything holding text data.""" 242 data = Node.value
243
244 -class Text (_CharacterData):
245 - def __init__ (self, text, **kw):
246 super(Text, self).__init__(value=text, node_type=xml.dom.Node.TEXT_NODE, **kw)
247
248 -class Comment (_CharacterData):
249 - def __init__ (self, text, **kw):
250 super(Text, self).__init__(value=text, node_type=xml.dom.Node.COMMENT_NODE, **kw)
251 252 if '__main__' == __name__: 253 import sys 254 xml_file = '/home/pab/pyxb/dev/examples/tmsxtvd/tmsdatadirect_sample.xml' 255 if 1 < len(sys.argv): 256 xml_file = sys.argv[1] 257 258 doc = parse(file(xml_file)) 259 260 ## Local Variables: 261 ## fill-column:78 262 ## End: 263