Package pyxb :: Package binding :: Module saxer
[hide private]
[frames] | no frames]

Source Code for Module pyxb.binding.saxer

  1  # Copyright 2009, Peter A. Bigot 
  2  # 
  3  # Licensed under the Apache License, Version 2.0 (the "License"); you may 
  4  # not use this file except in compliance with the License. You may obtain a 
  5  # copy of the License at: 
  6  # 
  7  #            http://www.apache.org/licenses/LICENSE-2.0 
  8  # 
  9  # Unless required by applicable law or agreed to in writing, software 
 10  # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 
 11  # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 
 12  # License for the specific language governing permissions and limitations 
 13  # under the License. 
 14   
 15  """This module contains support for generating bindings from an XML stream 
 16  using a SAX parser.""" 
 17   
 18  import xml.sax 
 19  import xml.sax.handler 
 20  import pyxb.namespace 
 21  import pyxb.utils.saxutils 
 22  import pyxb.utils.saxdom 
 23  import pyxb.utils.utility 
 24  import basis 
 25  from pyxb.namespace.builtin import XMLSchema_instance as XSI 
 26   
27 -class _SAXElementState (pyxb.utils.saxutils.SAXElementState):
28 """State required to generate bindings for a specific element. 29 30 If the document being parsed includes references to unrecognized elements, 31 a DOM instance of the element and its content is created and treated as a 32 wildcard element. 33 """ 34 35 # An expanded name corresponding to xsi:nil 36 __XSINilTuple = XSI.nil.uriTuple() 37 38 # The binding instance being created for this element. When the 39 # element type has simple content, the binding instance cannot be 40 # created until the end of the element has been reached and the 41 # content of the element has been processed accumulated for use in 42 # the instance constructor. When the element type has complex 43 # content, the binding instance must be created at the start of 44 # the element, so contained elements can be properly stored. 45 __bindingInstance = None 46 47 # The schema binding for the element being constructed. 48 __elementBinding = None 49
50 - def setElementBinding (self, element_binding):
51 """Record the binding to be used for this element. 52 53 Generally ignored, except at the top level this is the only way to 54 associate a binding instance created from an xsi:type description with 55 a specific element.""" 56 self.__elementBinding = element_binding
57 58 # The nearest enclosing complex type definition
59 - def enclosingCTD (self):
60 """The nearest enclosing complex type definition, as used for 61 resolving local element/attribute names. 62 63 @return: An instance of L{basis.complexTypeDefinition}, or C{None} if 64 the element is top-level 65 """ 66 return self.__enclosingCTD
67 __enclosingCTD = None 68 69 # The factory that is called to create a binding instance for this 70 # element; None if the binding instance was created at the start 71 # of the element. 72 __delayedConstructor = None 73 74 # An xml.sax.xmlreader.Attributes instance providing the 75 # attributes for the element. 76 __attributes = None 77 78 # An xml.dom.Node corresponding to the (sub-)document 79 __domDocument = None 80 81 __domDepth = None 82
83 - def __init__ (self, **kw):
84 super(_SAXElementState, self).__init__(**kw) 85 self.__bindingInstance = None 86 parent_state = self.parentState() 87 if isinstance(parent_state, _SAXElementState): 88 self.__enclosingCTD = parent_state.enclosingCTD() 89 self.__domDocument = parent_state.__domDocument 90 if self.__domDocument is not None: 91 self.__domDepth = parent_state.__domDepth + 1
92
93 - def setEnclosingCTD (self, enclosing_ctd):
94 """Set the enclosing complex type definition for this element. 95 96 @param enclosing_ctd: The scope for a local element. 97 @type enclosing_ctd: L{basis.complexTypeDefinition} 98 @return: C{self} 99 """ 100 self.__enclosingCTD = enclosing_ctd
101 102 # Create the binding instance for this element.
103 - def __constructElement (self, new_object_factory, attrs, content=None):
104 kw = { '_from_xml' : True } 105 106 # Note whether the node is marked nil 107 if attrs.has_key(self.__XSINilTuple): 108 kw['_nil'] = pyxb.binding.datatypes.boolean(attrs.getValue(self.__XSINilTuple)) 109 110 if content is None: 111 content = [] 112 self.__bindingInstance = new_object_factory(*content, **kw) 113 if isinstance(self.__bindingInstance, pyxb.utils.utility.Locatable_mixin): 114 self.__bindingInstance._setLocation(self.location()) 115 116 # Record the namespace context so users of the binding can 117 # interpret QNames within the attributes and content. 118 self.__bindingInstance._setNamespaceContext(self.__namespaceContext) 119 120 # Set the attributes. 121 if isinstance(self.__bindingInstance, pyxb.binding.basis.complexTypeDefinition): 122 # NB: attrs implements the SAX AttributesNS interface, meaning 123 # that names are pairs of (namespaceURI, localName), just like we 124 # want them to be. 125 for attr_name in self.__attributes.getNames(): 126 attr_en = pyxb.namespace.ExpandedName(attr_name) 127 # Ignore xmlns and xsi attributes; we've already handled those 128 if attr_en.namespace() in ( pyxb.namespace.XMLNamespaces, XSI ): 129 continue 130 au = self.__bindingInstance._setAttribute(attr_en, attrs.getValue(attr_name)) 131 132 return self.__bindingInstance
133
134 - def inDOMMode (self):
135 return self.__domDocument is not None
136
137 - def enterDOMMode (self, attrs):
138 """Actions upon first encountering an element for which we cannot create a binding. 139 140 Invoking this transitions the parser into DOM mode, creating a new DOM 141 document that will represent this element including its content.""" 142 assert not self.__domDocument 143 self.__domDocument = pyxb.utils.saxdom.Document(namespace_context=self.namespaceContext()) 144 self.__domDepth = 0 145 return self.startDOMElement(attrs)
146
147 - def startDOMElement (self, attrs):
148 """Actions upon entering an element that is part of a DOM subtree.""" 149 self.__domDepth += 1 150 #print 'Enter level %d with %s' % (self.__domDepth, self.expandedName()) 151 self.__attributes = pyxb.utils.saxdom.NamedNodeMap() 152 ns_ctx = self.namespaceContext() 153 for name in attrs.getNames(): 154 attr_en = pyxb.namespace.ExpandedName(name) 155 self.__attributes._addItem(pyxb.utils.saxdom.Attr(expanded_name=attr_en, namespace_context=ns_ctx, value=attrs.getValue(name), location=self.location()))
156
157 - def endDOMElement (self):
158 """Actions upon leaving an element that is part of a DOM subtree.""" 159 ns_ctx = self.namespaceContext() 160 element = pyxb.utils.saxdom.Element(namespace_context=ns_ctx, expanded_name=self.expandedName(), attributes=self.__attributes, location=self.location()) 161 for ( content, element_use, maybe_element ) in self.content(): 162 if isinstance(content, xml.dom.Node): 163 element.appendChild(content) 164 else: 165 element.appendChild(pyxb.utils.saxdom.Text(content, namespace_context=ns_ctx)) 166 #print 'Leaving level %d with %s' % (self.__domDepth, self.expandedName()) 167 self.__domDepth -= 1 168 if 0 == self.__domDepth: 169 self.__domDocument.appendChild(element) 170 #pyxb.utils.saxdom._DumpDOM(self.__domDocument) 171 self.__domDepth = None 172 self.__domDocument = None 173 parent_state = self.parentState() 174 parent_state.addElementContent(element, None) 175 return element
176
177 - def startBindingElement (self, type_class, new_object_factory, element_use, attrs):
178 """Actions upon entering an element that will produce a binding instance. 179 180 The element use is recorded. If the type is a subclass of 181 L{basis.simpleTypeDefinition}, a delayed constructor is recorded so 182 the binding instance can be created upon completion of the element; 183 otherwise, a binding instance is created and stored. The attributes 184 are used to initialize the binding instance (now, or upon element 185 end). 186 187 @param type_class: The Python type of the binding instance 188 @type type_class: subclass of L{basis._TypeBinding_mixin} 189 @param new_object_factory: A callable object that creates an instance of the C{type_class} 190 @param element_use: The element use with which the binding instance is associated. Will be C{None} for top-level elements 191 @type element_use: L{basis.element} 192 @param attrs: The XML attributes associated with the element 193 @type attrs: C{xml.sax.xmlreader.Attributes} 194 @return: The generated binding instance, or C{None} if creation is delayed 195 """ 196 self.__delayedConstructor = None 197 self.__elementUse = element_use 198 self.__attributes = attrs 199 if type_class._IsSimpleTypeContent(): 200 self.__delayedConstructor = new_object_factory 201 self.__attributes = attrs 202 else: 203 self.__constructElement(new_object_factory, attrs) 204 return self.__bindingInstance
205
206 - def endBindingElement (self):
207 """Perform any end-of-element processing. 208 209 For simple type instances, this creates the binding instance. 210 @return: The generated binding instance 211 """ 212 if self.__delayedConstructor is not None: 213 args = [] 214 for (content, element_use, maybe_element) in self.__content: 215 assert not maybe_element 216 assert element_use is None 217 assert isinstance(content, basestring) 218 args.append(content) 219 assert 1 >= len(args), 'Unexpected STD content %s' % (args,) 220 self.__constructElement(self.__delayedConstructor, self.__attributes, args) 221 else: 222 #print 'Extending %s by content %s' % (self.__bindingInstance, self.__content,) 223 for (content, element_use, maybe_element) in self.__content: 224 self.__bindingInstance.append(content, element_use, maybe_element, require_validation=pyxb._ParsingRequiresValid) 225 parent_state = self.parentState() 226 if parent_state is not None: 227 parent_state.addElementContent(self.__bindingInstance, self.__elementUse) 228 # As CreateFromDOM does, validate the resulting element 229 if self.__bindingInstance._element() is None: 230 self.__bindingInstance._setElement(self.__elementBinding) 231 if pyxb._ParsingRequiresValid: 232 self.__bindingInstance.validateBinding() 233 return self.__bindingInstance
234
235 -class PyXBSAXHandler (pyxb.utils.saxutils.BaseSAXHandler):
236 """A SAX handler class which generates a binding instance for a document 237 through a streaming parser. 238 239 An example of using this to parse the document held in the string C{xmls} is:: 240 241 import pyxb.binding.saxer 242 import StringIO 243 244 saxer = pyxb.binding.saxer.make_parser() 245 handler = saxer.getContentHandler() 246 saxer.parse(StringIO.StringIO(xml)) 247 instance = handler.rootObject() 248 249 """ 250 251 # Whether invocation of handler methods should be traced 252 __trace = False 253 254 # An expanded name corresponding to xsi:type 255 __XSITypeTuple = XSI.type.uriTuple() 256 257 __domHandler = None 258 __domDepth = None 259
260 - def rootObject (self):
261 """Return the binding object corresponding to the top-most 262 element in the document 263 264 @return: An instance of L{basis._TypeBinding_mixin} (most usually a 265 L{basis.complexTypeDefinition}. 266 267 @raise pyxb.UnrecognizedElementError: No binding could be found to 268 match the top-level element in the document.""" 269 if not isinstance(self.__rootObject, basis._TypeBinding_mixin): 270 # Happens if the top-level element got processed as a DOM instance. 271 raise pyxb.UnrecognizedElementError(dom_node=self.__rootObject) 272 return self.__rootObject
273 __rootObject = None 274
275 - def reset (self):
276 """Reset the state of the handler in preparation for processing a new 277 document. 278 279 @return: C{self} 280 """ 281 super(PyXBSAXHandler, self).reset() 282 self.__rootObject = None 283 return self
284
285 - def __init__ (self, **kw):
286 """Create a parser instance for converting XML to bindings. 287 288 @keyword element_state_constructor: Overridden with the value 289 L{_SAXElementState} before invoking the L{superclass 290 constructor<pyxb.utils.saxutils.BaseSAXHandler.__init__>}. 291 """ 292 293 kw.setdefault('element_state_constructor', _SAXElementState) 294 super(PyXBSAXHandler, self).__init__(**kw) 295 self.reset()
296
297 - def startElementNS (self, name, qname, attrs):
298 (this_state, parent_state, ns_ctx, name_en) = super(PyXBSAXHandler, self).startElementNS(name, qname, attrs) 299 300 # Delegate processing if in DOM mode 301 if this_state.inDOMMode(): 302 return this_state.startDOMElement(attrs) 303 304 # Resolve the element within the appropriate context. Note 305 # that global elements have no use, only the binding. 306 if parent_state.enclosingCTD() is not None: 307 (element_binding, element_use) = parent_state.enclosingCTD()._ElementBindingUseForName(name_en) 308 else: 309 element_use = None 310 element_binding = name_en.elementBinding() 311 this_state.setElementBinding(element_binding) 312 313 # Non-root elements should have an element use, from which we can 314 # extract the binding if we couldn't find one elsewhere. (Keep any 315 # current binding, since it may be a member of a substitution group.) 316 if (element_use is not None) and (element_binding is None): 317 assert self.__rootObject is not None 318 element_binding = element_use.elementBinding() 319 assert element_binding is not None 320 321 # Start knowing nothing 322 type_class = None 323 if element_binding is not None: 324 element_binding = element_binding.elementForName(name) 325 type_class = element_binding.typeDefinition() 326 327 # Process an xsi:type attribute, if present 328 if attrs.has_key(self.__XSITypeTuple): 329 (did_replace, type_class) = XSI._InterpretTypeAttribute(attrs.getValue(self.__XSITypeTuple), ns_ctx, None, type_class) 330 if did_replace: 331 element_binding = None 332 333 if type_class is None: 334 # Bother. We don't know what this thing is. But that's not an 335 # error, if the schema accepts wildcards. For consistency with 336 # the DOM-based interface, we need to build a DOM node. 337 return this_state.enterDOMMode(attrs) 338 339 if element_binding is not None: 340 # Invoke binding __call__ method not Factory, so can check for 341 # abstract elements. 342 new_object_factory = element_binding 343 else: 344 new_object_factory = type_class.Factory 345 346 # Update the enclosing complex type definition for this 347 # element state. 348 assert type_class is not None 349 if issubclass(type_class, pyxb.binding.basis.complexTypeDefinition): 350 this_state.setEnclosingCTD(type_class) 351 else: 352 this_state.setEnclosingCTD(parent_state.enclosingCTD()) 353 354 # Process the element start. This may or may not return a 355 # binding object. 356 binding_object = this_state.startBindingElement(type_class, new_object_factory, element_use, attrs) 357 358 # If the top-level element has complex content, this sets the 359 # root object. If it has simple content, see endElementNS. 360 if self.__rootObject is None: 361 self.__rootObject = binding_object
362
363 - def endElementNS (self, name, qname):
364 this_state = super(PyXBSAXHandler, self).endElementNS(name, qname) 365 if this_state.inDOMMode(): 366 # Delegate processing if in DOM mode. Note that completing this 367 # element may take us out of DOM mode. In any case, the returned 368 # binding object is a DOM element instance. 369 binding_object = this_state.endDOMElement() 370 else: 371 # Process the element end. This will return a binding object, 372 # either the one created at the start or the one created at 373 # the end. 374 binding_object = this_state.endBindingElement() 375 assert binding_object is not None 376 377 # If we don't have a root object, save it. No, there is not a 378 # problem doing this on the close of the element. If the 379 # top-level element has complex content, the object was 380 # created on start, and the root object has been assigned. If 381 # it has simple content, then there are no internal elements 382 # that could slip in and set this before we get to it here. 383 if self.__rootObject is None: 384 self.__rootObject = binding_object
385
386 -def make_parser (*args, **kw):
387 """Extend L{pyxb.utils.saxutils.make_parser} to change the default 388 C{content_handler_constructor} to be L{PyXBSAXHandler}. 389 """ 390 kw.setdefault('content_handler_constructor', PyXBSAXHandler) 391 return pyxb.utils.saxutils.make_parser(*args, **kw)
392 393 ## Local Variables: 394 ## fill-column:78 395 ## End: 396