Package pyxb :: Package binding :: Module saxer
[hide private]
[frames] | no frames]

Source Code for Module pyxb.binding.saxer

  1  # Copyright 2009, Peter A. Bigot 
  2  # 
  3  # Licensed under the Apache License, Version 2.0 (the "License"); you may 
  4  # not use this file except in compliance with the License. You may obtain a 
  5  # copy of the License at: 
  6  # 
  7  #            http://www.apache.org/licenses/LICENSE-2.0 
  8  # 
  9  # Unless required by applicable law or agreed to in writing, software 
 10  # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 
 11  # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 
 12  # License for the specific language governing permissions and limitations 
 13  # under the License. 
 14   
 15  """This module contains support for generating bindings from an XML stream 
 16  using a SAX parser.""" 
 17   
 18  import xml.sax 
 19  import xml.sax.handler 
 20  import pyxb.namespace 
 21  import pyxb.utils.saxutils 
 22  import pyxb.utils.saxdom 
 23  import pyxb.utils.utility 
 24  import basis 
 25  from pyxb.namespace.builtin import XMLSchema_instance as XSI 
 26   
27 -class _SAXElementState (pyxb.utils.saxutils.SAXElementState):
28 """State required to generate bindings for a specific element. 29 30 If the document being parsed includes references to unrecognized elements, 31 a DOM instance of the element and its content is created and treated as a 32 wildcard element. 33 """ 34 35 # An expanded name corresponding to xsi:nil 36 __XSINilTuple = XSI.nil.uriTuple() 37 38 # The binding object being created for this element. When the 39 # element type has simple content, the binding instance cannot be 40 # created until the end of the element has been reached and the 41 # content of the element has been processed accumulated for use in 42 # the instance constructor. When the element type has complex 43 # content, the binding instance must be created at the start of 44 # the element, so contained elements can be properly stored. 45 __bindingObject = None 46 47 # The nearest enclosing complex type definition
48 - def enclosingCTD (self):
49 """The nearest enclosing complex type definition, as used for 50 resolving local element/attribute names. 51 52 @return: An instance of L{basis.complexTypeDefinition}, or C{None} if 53 the element is top-level 54 """ 55 return self.__enclosingCTD
56 __enclosingCTD = None 57 58 # The factory that is called to create a binding instance for this 59 # element; None if the binding instance was created at the start 60 # of the element. 61 __delayedConstructor = None 62 63 # An xml.sax.xmlreader.Attributes instance providing the 64 # attributes for the element. 65 __attributes = None 66 67 # An xml.dom.Node corresponding to the (sub-)document 68 __domDocument = None 69 70 __domDepth = None 71
72 - def __init__ (self, **kw):
73 super(_SAXElementState, self).__init__(**kw) 74 self.__bindingObject = None 75 parent_state = self.parentState() 76 if isinstance(parent_state, _SAXElementState): 77 self.__enclosingCTD = parent_state.enclosingCTD() 78 self.__domDocument = parent_state.__domDocument 79 if self.__domDocument is not None: 80 self.__domDepth = parent_state.__domDepth + 1
81
82 - def setEnclosingCTD (self, enclosing_ctd):
83 """Set the enclosing complex type definition for this element. 84 85 @param enclosing_ctd: The scope for a local element. 86 @type enclosing_ctd: L{basis.complexTypeDefinition} 87 @return: C{self} 88 """ 89 self.__enclosingCTD = enclosing_ctd
90 91 # Create the binding instance for this element.
92 - def __constructElement (self, new_object_factory, attrs, content=None):
93 kw = {} 94 95 # Note whether the node is marked nil 96 if attrs.has_key(self.__XSINilTuple): 97 kw['_nil'] = pyxb.binding.datatypes.boolean(attrs.getValue(self.__XSINilTuple)) 98 99 if content is None: 100 content = [] 101 self.__bindingObject = new_object_factory(*content, **kw) 102 if isinstance(self.__bindingObject, pyxb.utils.utility.Locatable_mixin): 103 self.__bindingObject._setLocation(self.location()) 104 105 # Record the namespace context so users of the binding can 106 # interpret QNames within the attributes and content. 107 self.__bindingObject._setNamespaceContext(self.__namespaceContext) 108 109 # Set the attributes. 110 if isinstance(self.__bindingObject, pyxb.binding.basis.complexTypeDefinition): 111 # NB: attrs implements the SAX AttributesNS interface, meaning 112 # that names are pairs of (namespaceURI, localName), just like we 113 # want them to be. 114 for attr_name in self.__attributes.getNames(): 115 attr_en = pyxb.namespace.ExpandedName(attr_name) 116 # Ignore xmlns and xsi attributes; we've already handled those 117 if attr_en.namespace() in ( pyxb.namespace.XMLNamespaces, XSI ): 118 continue 119 au = self.__bindingObject._setAttribute(attr_en, attrs.getValue(attr_name)) 120 121 return self.__bindingObject
122
123 - def inDOMMode (self):
124 return self.__domDocument is not None
125
126 - def enterDOMMode (self, attrs):
127 """Actions upon first encountering an element for which we cannot create a binding. 128 129 Invoking this transitions the parser into DOM mode, creating a new DOM 130 document that will represent this element including its content.""" 131 assert not self.__domDocument 132 self.__domDocument = pyxb.utils.saxdom.Document(namespace_context=self.namespaceContext()) 133 self.__domDepth = 0 134 return self.startDOMElement(attrs)
135
136 - def startDOMElement (self, attrs):
137 """Actions upon entering an element that is part of a DOM subtree.""" 138 self.__domDepth += 1 139 #print 'Enter level %d with %s' % (self.__domDepth, self.expandedName()) 140 self.__attributes = pyxb.utils.saxdom.NamedNodeMap() 141 ns_ctx = self.namespaceContext() 142 for name in attrs.getNames(): 143 attr_en = pyxb.namespace.ExpandedName(name) 144 self.__attributes._addItem(pyxb.utils.saxdom.Attr(expanded_name=attr_en, namespace_context=ns_ctx, value=attrs.getValue(name), location=self.location()))
145
146 - def endDOMElement (self):
147 """Actions upon leaving an element that is part of a DOM subtree.""" 148 ns_ctx = self.namespaceContext() 149 element = pyxb.utils.saxdom.Element(namespace_context=ns_ctx, expanded_name=self.expandedName(), attributes=self.__attributes, location=self.location()) 150 for ( content, element_use, maybe_element ) in self.content(): 151 if isinstance(content, xml.dom.Node): 152 element.appendChild(content) 153 else: 154 element.appendChild(pyxb.utils.saxdom.Text(content, namespace_context=ns_ctx)) 155 #print 'Leaving level %d with %s' % (self.__domDepth, self.expandedName()) 156 self.__domDepth -= 1 157 if 0 == self.__domDepth: 158 self.__domDocument.appendChild(element) 159 #pyxb.utils.saxdom._DumpDOM(self.__domDocument) 160 self.__domDepth = None 161 self.__domDocument = None 162 parent_state = self.parentState() 163 parent_state.addElementContent(element, None) 164 return element
165
166 - def startBindingElement (self, type_class, new_object_factory, element_use, attrs):
167 """Actions upon entering an element that will produce a binding instance. 168 169 The element use is recorded. If the type is a subclass of 170 L{basis.simpleTypeDefinition}, a delayed constructor is recorded so 171 the binding instance can be created upon completion of the element; 172 otherwise, a binding instance is created and stored. The attributes 173 are used to initialize the binding instance (now, or upon element 174 end). 175 176 @param type_class: The Python type of the binding instance 177 @type type_class: subclass of L{basis._TypeBinding_mixin} 178 @param new_object_factory: A callable object that creates an instance of the C{type_class} 179 @param element_use: The element use with which the binding instance is associated. Will be C{None} for top-level elements 180 @type element_use: L{basis.element} 181 @param attrs: The XML attributes associated with the element 182 @type attrs: C{xml.sax.xmlreader.Attributes} 183 @return: The generated binding instance, or C{None} if creation is delayed 184 """ 185 self.__delayedConstructor = None 186 self.__elementUse = element_use 187 self.__attributes = attrs 188 if type_class._IsSimpleTypeContent(): 189 self.__delayedConstructor = new_object_factory 190 self.__attributes = attrs 191 else: 192 self.__constructElement(new_object_factory, attrs) 193 return self.__bindingObject
194
195 - def endBindingElement (self):
196 """Perform any end-of-element processing. 197 198 For simple type instances, this creates the binding instance. 199 @return: The generated binding instance 200 """ 201 if self.__delayedConstructor is not None: 202 args = [] 203 for (content, element_use, maybe_element) in self.__content: 204 assert not maybe_element 205 assert element_use is None 206 assert isinstance(content, basestring) 207 args.append(content) 208 assert 1 >= len(args), 'Unexpected STD content %s' % (args,) 209 self.__constructElement(self.__delayedConstructor, self.__attributes, args) 210 else: 211 #print 'Extending %s by content %s' % (self.__bindingObject, self.__content,) 212 for (content, element_use, maybe_element) in self.__content: 213 self.__bindingObject.append(content, element_use, maybe_element, require_validation=pyxb._ParsingRequiresValid) 214 parent_state = self.parentState() 215 if parent_state is not None: 216 parent_state.addElementContent(self.__bindingObject, self.__elementUse) 217 # As CreateFromDOM does, validate the resulting element 218 if pyxb._ParsingRequiresValid: 219 self.__bindingObject.validateBinding() 220 return self.__bindingObject
221
222 -class PyXBSAXHandler (pyxb.utils.saxutils.BaseSAXHandler):
223 """A SAX handler class which generates a binding instance for a document 224 through a streaming parser. 225 226 An example of using this to parse the document held in the string C{xmls} is:: 227 228 import pyxb.binding.saxer 229 import StringIO 230 231 saxer = pyxb.binding.saxer.make_parser() 232 handler = saxer.getContentHandler() 233 saxer.parse(StringIO.StringIO(xml)) 234 instance = handler.rootObject() 235 236 """ 237 238 # Whether invocation of handler methods should be traced 239 __trace = False 240 241 # An expanded name corresponding to xsi:type 242 __XSITypeTuple = XSI.type.uriTuple() 243 244 __domHandler = None 245 __domDepth = None 246
247 - def rootObject (self):
248 """Return the binding object corresponding to the top-most 249 element in the document 250 251 @return: An instance of L{basis._TypeBinding_mixin} (most usually a 252 L{basis.complexTypeDefinition}. 253 254 @raise pyxb.UnrecognizedElementError: No binding could be found to 255 match the top-level element in the document.""" 256 if not isinstance(self.__rootObject, basis._TypeBinding_mixin): 257 # Happens if the top-level element got processed as a DOM instance. 258 raise pyxb.UnrecognizedElementError(dom_node=self.__rootObject) 259 return self.__rootObject
260 __rootObject = None 261
262 - def reset (self):
263 """Reset the state of the handler in preparation for processing a new 264 document. 265 266 @return: C{self} 267 """ 268 super(PyXBSAXHandler, self).reset() 269 self.__rootObject = None 270 return self
271
272 - def __init__ (self, **kw):
273 """Create a parser instance for converting XML to bindings. 274 275 @keyword element_state_constructor: Overridden with the value 276 L{_SAXElementState} before invoking the L{superclass 277 constructor<pyxb.utils.saxutils.BaseSAXHandler.__init__>}. 278 """ 279 280 kw.setdefault('element_state_constructor', _SAXElementState) 281 super(PyXBSAXHandler, self).__init__(**kw) 282 self.reset()
283
284 - def startElementNS (self, name, qname, attrs):
285 (this_state, parent_state, ns_ctx, name_en) = super(PyXBSAXHandler, self).startElementNS(name, qname, attrs) 286 287 # Delegate processing if in DOM mode 288 if this_state.inDOMMode(): 289 return this_state.startDOMElement(attrs) 290 291 # Resolve the element within the appropriate context. Note 292 # that global elements have no use, only the binding. 293 if parent_state.enclosingCTD() is not None: 294 (element_binding, element_use) = parent_state.enclosingCTD()._ElementBindingUseForName(name_en) 295 else: 296 element_use = None 297 element_binding = name_en.elementBinding() 298 299 # Non-root elements should have an element use, from which we can 300 # extract the binding if we couldn't find one elsewhere. (Keep any 301 # current binding, since it may be a member of a substitution group.) 302 if (element_use is not None) and (element_binding is None): 303 assert self.__rootObject is not None 304 element_binding = element_use.elementBinding() 305 assert element_binding is not None 306 307 # Start knowing nothing 308 type_class = None 309 if element_binding is not None: 310 element_binding = element_binding.elementForName(name) 311 type_class = element_binding.typeDefinition() 312 313 # Process an xsi:type attribute, if present 314 if attrs.has_key(self.__XSITypeTuple): 315 (did_replace, type_class) = XSI._InterpretTypeAttribute(attrs.getValue(self.__XSITypeTuple), ns_ctx, None, type_class) 316 if did_replace: 317 element_binding = None 318 319 if type_class is None: 320 # Bother. We don't know what this thing is. But that's not an 321 # error, if the schema accepts wildcards. For consistency with 322 # the DOM-based interface, we need to build a DOM node. 323 return this_state.enterDOMMode(attrs) 324 325 if element_binding is not None: 326 # Invoke binding __call__ method not Factory, so can check for 327 # abstract elements. 328 new_object_factory = element_binding 329 else: 330 new_object_factory = type_class.Factory 331 332 # Update the enclosing complex type definition for this 333 # element state. 334 assert type_class is not None 335 if issubclass(type_class, pyxb.binding.basis.complexTypeDefinition): 336 this_state.setEnclosingCTD(type_class) 337 else: 338 this_state.setEnclosingCTD(parent_state.enclosingCTD()) 339 340 # Process the element start. This may or may not return a 341 # binding object. 342 binding_object = this_state.startBindingElement(type_class, new_object_factory, element_use, attrs) 343 344 # If the top-level element has complex content, this sets the 345 # root object. If it has simple content, see endElementNS. 346 if self.__rootObject is None: 347 self.__rootObject = binding_object
348
349 - def endElementNS (self, name, qname):
350 this_state = super(PyXBSAXHandler, self).endElementNS(name, qname) 351 if this_state.inDOMMode(): 352 # Delegate processing if in DOM mode. Note that completing this 353 # element may take us out of DOM mode. In any case, the returned 354 # binding object is a DOM element instance. 355 binding_object = this_state.endDOMElement() 356 else: 357 # Process the element end. This will return a binding object, 358 # either the one created at the start or the one created at 359 # the end. 360 binding_object = this_state.endBindingElement() 361 assert binding_object is not None 362 363 # If we don't have a root object, save it. No, there is not a 364 # problem doing this on the close of the element. If the 365 # top-level element has complex content, the object was 366 # created on start, and the root object has been assigned. If 367 # it has simple content, then there are no internal elements 368 # that could slip in and set this before we get to it here. 369 if self.__rootObject is None: 370 self.__rootObject = binding_object
371
372 -def make_parser (*args, **kw):
373 """Extend L{pyxb.utils.saxutils.make_parser} to change the default 374 C{content_handler_constructor} to be L{PyXBSAXHandler}. 375 """ 376 kw.setdefault('content_handler_constructor', PyXBSAXHandler) 377 return pyxb.utils.saxutils.make_parser(*args, **kw)
378 379 ## Local Variables: 380 ## fill-column:78 381 ## End: 382