Package pyxb :: Package utils :: Module utility
[hide private]
[frames] | no frames]

Source Code for Module pyxb.utils.utility

   1  # Copyright 2009, Peter A. Bigot 
   2  # 
   3  # Licensed under the Apache License, Version 2.0 (the "License"); you may 
   4  # not use this file except in compliance with the License. You may obtain a 
   5  # copy of the License at: 
   6  # 
   7  #            http://www.apache.org/licenses/LICENSE-2.0 
   8  # 
   9  # Unless required by applicable law or agreed to in writing, software 
  10  # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 
  11  # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 
  12  # License for the specific language governing permissions and limitations 
  13  # under the License. 
  14   
  15  """Utility functions and classes.""" 
  16   
  17  import re 
  18  import os 
  19  import errno 
  20  import pyxb 
  21   
  22  # Import utility routines that are not from PyXB so have distinct 
  23  # licensing. 
  24  from pyxb.utils.activestate import * 
  25   
26 -def QuotedEscaped (s):
27 """Convert a string into a literal value that can be used in Python source. 28 29 This just calls C{repr}. No point in getting all complex when the language 30 already gives us what we need. 31 32 @rtype: C{str} 33 """ 34 return repr(s)
35
36 -def _DefaultXMLIdentifierToPython (identifier):
37 """Default implementation for _XMLIdentifierToPython 38 39 For historical reasons, this converts the identifier from a str to 40 unicode in the system default encoding. This should have no 41 practical effect. 42 43 @param identifier : some XML identifier 44 45 @return: C{unicode(identifier)} 46 """ 47 48 return unicode(identifier)
49
50 -def _SetXMLIdentifierToPython (xml_identifier_to_python):
51 """Configure a callable L{MakeIdentifier} uses to pre-process an XM Lidentifier. 52 53 In Python3, identifiers can be full Unicode tokens, but in Python2, 54 all identifiers must be ASCII characters. L{MakeIdentifier} enforces 55 this by removing all characters that are not valid within an 56 identifier. 57 58 In some cases, an application generating bindings may be able to 59 transliterate Unicode code points that are not valid Python identifier 60 characters into something else. This callable can be assigned to 61 perform that translation before the invalid characters are 62 stripped. 63 64 It is not the responsibility of this callable to do anything other 65 than replace whatever characters it wishes to. All 66 transformations performed by L{MakeIdentifier} will still be 67 applied, to ensure the output is in fact a legal identifier. 68 69 @param xml_identifier_to_python : A callable that takes a string 70 and returns a Unicode, possibly with non-identifier characters 71 replaced by other characters. Pass C{None} to reset to the 72 default implementation, which is L{_DefaultXMLIdentifierToPython}. 73 74 @rtype: C{unicode} 75 """ 76 global _XMLIdentifierToPython 77 if xml_identifier_to_python is None: 78 xml_identifier_to_python = _DefaultXMLIdentifierToPython 79 _XMLIdentifierToPython = xml_identifier_to_python
80 81 _XMLIdentifierToPython = _DefaultXMLIdentifierToPython 82 83 _UnderscoreSubstitute_re = re.compile(r'[- .]') 84 _NonIdentifier_re = re.compile(r'[^a-zA-Z0-9_]') 85 _PrefixUnderscore_re = re.compile(r'^_+') 86 _PrefixDigit_re = re.compile(r'^\d+') 87 _CamelCase_re = re.compile(r'_\w') 88
89 -def MakeIdentifier (s, camel_case=False):
90 """Convert a string into something suitable to be a Python identifier. 91 92 The string is processed by L{_XMLIdentifierToPython}. Following 93 this, dashes, spaces, and periods are replaced by underscores, and 94 characters not permitted in Python identifiers are stripped. 95 Furthermore, any leading underscores are removed. If the result 96 begins with a digit, the character 'n' is prepended. If the 97 result is the empty string, the string 'emptyString' is 98 substituted. 99 100 No check is made for L{conflicts with keywords <DeconflictKeyword>}. 101 102 @keyword camel_case : If C{True}, any underscore in the result 103 string that is immediately followed by an alphanumeric is replaced 104 by the capitalized version of that alphanumeric. Thus, 105 'one_or_two' becomes 'oneOrTwo'. If C{False} (default), has no 106 effect. 107 108 @rtype: C{str} 109 """ 110 s = _XMLIdentifierToPython(s) 111 s = _PrefixUnderscore_re.sub('', _NonIdentifier_re.sub('',_UnderscoreSubstitute_re.sub('_', s))) 112 if camel_case: 113 s = _CamelCase_re.sub(lambda _m: _m.group(0)[1].upper(), s) 114 if _PrefixDigit_re.match(s): 115 s = 'n' + s 116 if 0 == len(s): 117 s = 'emptyString' 118 return s
119 120 _PythonKeywords = frozenset( ( 121 "and", "as", "assert", "break", "class", "continue", "def", "del", 122 "elif", "else", "except", "exec", "finally", "for", "from", "global", 123 "if", "import", "in", "is", "lambda", "not", "or", "pass", "print", 124 "raise", "return", "try", "while", "with", "yield" 125 ) ) 126 """Python keywords. Note that types like int and float are not 127 keywords. 128 129 @see: U{http://docs.python.org/reference/lexical_analysis.html#keywords}.""" 130 131 _PythonBuiltInConstants = frozenset( ( 132 "False", "True", "None", "NotImplemented", "Ellipsis", "__debug__", 133 ) ) 134 """Other symbols that aren't keywords but that can't be used. 135 136 @see: U{http://docs.python.org/library/constants.html}.""" 137 138 _Keywords = frozenset(_PythonKeywords.union(_PythonBuiltInConstants)) 139 """The keywords reserved for Python, derived from L{_PythonKeywords} 140 and L{_PythonBuiltInConstants}.""" 141
142 -def DeconflictKeyword (s, aux_keywords=frozenset()):
143 """If the provided string C{s} matches a Python language keyword, 144 append an underscore to distinguish them. 145 146 See also L{MakeUnique}. 147 148 @param s: string to be deconflicted 149 150 @keyword aux_keywords: optional iterable of additional strings 151 that should be treated as keywords. 152 153 @rtype: C{str} 154 155 """ 156 if (s in _Keywords) or (s in aux_keywords): 157 return '%s_' % (s,) 158 return s
159
160 -def MakeUnique (s, in_use):
161 """Return an identifier based on C{s} that is not in the given set. 162 163 The returned identifier is made unique by appending an underscore 164 and, if necessary, a serial number. 165 166 The order is : C{x}, C{x_}, C{x_2}, C{x_3}, ... 167 168 @param in_use: The set of identifiers already in use in the 169 relevant scope. C{in_use} is updated to contain the returned 170 identifier. 171 172 @rtype: C{str} 173 """ 174 if s in in_use: 175 ctr = 2 176 s = s.rstrip('_') 177 candidate = '%s_' % (s,) 178 while candidate in in_use: 179 candidate = '%s_%d' % (s, ctr) 180 ctr += 1 181 s = candidate 182 in_use.add(s) 183 return s
184
185 -def PrepareIdentifier (s, in_use, aux_keywords=frozenset(), private=False, protected=False):
186 """Combine everything required to create a unique identifier. 187 188 Leading and trailing underscores are stripped from all 189 identifiers. 190 191 @param in_use: the set of already used identifiers. Upon return 192 from this function, it is updated to include the returned 193 identifier. 194 195 @keyword aux_keywords: an optional set of additional symbols that 196 are illegal in the given context; use this to prevent conflicts 197 with known method names. 198 199 @keyword private: if C{False} (default), all leading underscores 200 are stripped, guaranteeing the identifier will not be private. If 201 C{True}, the returned identifier has two leading underscores, 202 making it a private variable within a Python class. 203 204 @keyword protected: as for C{private}, but uses only one 205 underscore. 206 207 @rtype: C{str} 208 209 @note: Only module-level identifiers should be treated as 210 protected. The class-level L{_DeconflictSymbols_mixin} 211 infrastructure does not include protected symbols. All class and 212 instance members beginning with a single underscore are reserved 213 for the PyXB infrastructure.""" 214 s = DeconflictKeyword(MakeIdentifier(s).strip('_'), aux_keywords) 215 if private: 216 s = '__' + s 217 elif protected: 218 s = '_' + s 219 return MakeUnique(s, in_use)
220 221 # @todo: descend from pyxb.cscRoot, if we import pyxb
222 -class _DeconflictSymbols_mixin (object):
223 """Mix-in used to deconflict public symbols in classes that may be 224 inherited by generated binding classes. 225 226 Some classes, like the L{pyxb.binding.basis.element} or 227 L{pyxb.binding.basis.simpleTypeDefinition} classes in 228 L{pyxb.binding.basis}, have public symbols associated with 229 functions and variables. It is possible that an XML schema might 230 include tags and attribute names that match these symbols. To 231 avoid conflict, the reserved symbols marked in this class are 232 added to the pre-defined identifier set. 233 234 Subclasses should create a class-level variable 235 C{_ReservedSymbols} that contains a set of strings denoting the 236 symbols reserved in this class, combined with those from any 237 superclasses that also have reserved symbols. Code like the 238 following is suggested:: 239 240 # For base classes (direct mix-in): 241 _ReservedSymbols = set([ 'one', 'two' ]) 242 # For subclasses: 243 _ReservedSymbols = SuperClass._ReservedSymbols.union(set([ 'three' ])) 244 245 Only public symbols (those with no underscores) are currently 246 supported. (Private symbols can't be deconflicted that easily, 247 and no protected symbols that derive from the XML are created by 248 the binding generator.) 249 """ 250 251 _ReservedSymbols = set() 252 """There are no reserved symbols in the base class."""
253 254 # Regular expression detecting tabs, carriage returns, and line feeds 255 __TabCRLF_re = re.compile("[\t\n\r]") 256 # Regular expressoin detecting sequences of two or more spaces 257 __MultiSpace_re = re.compile(" +") 258
259 -def NormalizeWhitespace (text, preserve=False, replace=False, collapse=False):
260 """Normalize the given string. 261 262 Exactly one of the C{preserve}, C{replace}, and C{collapse} keyword 263 parameters must be assigned the value C{True} by the caller. 264 265 - C{preserve}: the text is returned unchanged. 266 267 - C{replace}: all tabs, newlines, and carriage returns are 268 replaced with ASCII spaces. 269 270 - C{collapse}: the C{replace} normalization is done, then 271 sequences of two or more spaces are replaced by a single space. 272 273 See the U{whiteSpace facet<http://www.w3.org/TR/xmlschema-2/#rf-whiteSpace>}. 274 275 @rtype: C{str} 276 """ 277 if preserve: 278 return text 279 text = __TabCRLF_re.sub(' ', text) 280 if replace: 281 return text 282 if collapse: 283 return __MultiSpace_re.sub(' ', text).strip() 284 # pyxb not imported here; could be. 285 raise Exception('NormalizeWhitespace: No normalization specified')
286
287 -class Graph:
288 """Represent a directed graph with arbitrary objects as nodes. 289 290 This is used in the L{code 291 generator<pyxb.binding.generate.Generator>} to determine order 292 dependencies among components within a namespace, and schema that 293 comprise various namespaces. An edge from C{source} to C{target} 294 indicates that some aspect of C{source} requires that some aspect 295 of C{target} already be available. 296 """ 297
298 - def __init__ (self, root=None):
299 self.__roots = None 300 if root is not None: 301 self.__roots = set([root]) 302 self.__edges = set() 303 self.__edgeMap = { } 304 self.__reverseMap = { } 305 self.__nodes = set()
306 307 __scc = None 308 __sccMap = None 309 __dfsOrder = None 310
311 - def addEdge (self, source, target):
312 """Add a directed edge from the C{source} to the C{target}. 313 314 The nodes are added to the graph if necessary. 315 """ 316 self.__edges.add( (source, target) ) 317 self.__edgeMap.setdefault(source, set()).add(target) 318 if source != target: 319 self.__reverseMap.setdefault(target, set()).add(source) 320 self.__nodes.add(source) 321 self.__nodes.add(target)
322
323 - def addNode (self, node):
324 """Add the given node to the graph.""" 325 self.__nodes.add(node)
326 327 __roots = None
328 - def roots (self, reset=False):
329 """Return the set of nodes calculated to be roots (i.e., those that have no incoming edges). 330 331 This caches the roots calculated in a previous invocation 332 unless the C{reset} keyword is given the value C{True}. 333 334 @note: Upon reset, any notes that had been manually added 335 using L{addNode} will no longer be in the set. 336 337 @keyword reset: If C{True}, any cached value is discarded and 338 recomputed. No effect if C{False} (defalut). 339 340 @rtype: C{set} 341 """ 342 if reset or (self.__roots is None): 343 self.__roots = set() 344 for n in self.__nodes: 345 if not (n in self.__reverseMap): 346 self.__roots.add(n) 347 return self.__roots
348 - def addRoot (self, root):
349 """Add the provided node as a root node, even if it has incoming edges. 350 351 The node need not be present in the graph (if necessary, it is added). 352 353 Note that roots added in this way do not survive a reset using 354 L{roots}. 355 356 @return: C{self} 357 """ 358 if self.__roots is None: 359 self.__roots = set() 360 self.__nodes.add(root) 361 self.__roots.add(root) 362 return self
363
364 - def edgeMap (self):
365 """Return the edges in the graph. 366 367 The edge data structure is a map from the source node to the 368 set of nodes that can be reached in a single step from the 369 source. 370 """ 371 return self.__edgeMap
372 __edgeMap = None 373
374 - def edges (self):
375 """Return the edges in the graph. 376 377 The edge data structure is a set of node pairs represented as C{( source, target )}. 378 """ 379 return self.__edges
380
381 - def nodes (self):
382 """Return the set of nodes in the graph. 383 384 The node collection data structure is a set containing node 385 objects, whatever they may be.""" 386 return self.__nodes
387
388 - def tarjan (self, reset=False):
389 """Execute Tarjan's algorithm on the graph. 390 391 U{Tarjan's 392 algorithm<http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm>} 393 computes the U{strongly-connected 394 components<http://en.wikipedia.org/wiki/Strongly_connected_component>} 395 of the graph: i.e., the sets of nodes that form a minimal 396 closed set under edge transition. In essence, the loops. We 397 use this to detect groups of components that have a dependency 398 cycle. 399 400 @keyword reset: If C{True}, any cached component set is erased 401 and recomputed. If C{True}, an existing previous result is 402 left unchanged.""" 403 404 if (self.__scc is not None) and (not reset): 405 return 406 self.__sccMap = { } 407 self.__stack = [] 408 self.__sccOrder = [] 409 self.__scc = [] 410 self.__index = 0 411 self.__tarjanIndex = { } 412 self.__tarjanLowLink = { } 413 for v in self.__nodes: 414 self.__tarjanIndex[v] = None 415 roots = self.roots() 416 if (0 == len(roots)) and (0 < len(self.__nodes)): 417 raise Exception('TARJAN: No roots found in graph with %d nodes' % (len(self.__nodes),)) 418 for r in roots: 419 self._tarjan(r) 420 self.__didTarjan = True
421
422 - def _tarjan (self, v):
423 """Do the work of Tarjan's algorithm for a given root node.""" 424 if self.__tarjanIndex.get(v) is not None: 425 # "Root" was already reached. 426 return 427 self.__tarjanIndex[v] = self.__tarjanLowLink[v] = self.__index 428 self.__index += 1 429 self.__stack.append(v) 430 source = v 431 for target in self.__edgeMap.get(source, []): 432 if self.__tarjanIndex[target] is None: 433 #print "Target %s not found in processed" % (target,) 434 self._tarjan(target) 435 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target]) 436 elif target in self.__stack: 437 #print "Found %s in stack" % (target,) 438 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target]) 439 else: 440 #print "No %s in stack" % (target,) 441 pass 442 443 if self.__tarjanLowLink[v] == self.__tarjanIndex[v]: 444 scc = [] 445 while True: 446 scc.append(self.__stack.pop()) 447 if v == scc[-1]: 448 break; 449 self.__sccOrder.append(scc) 450 if 1 < len(scc): 451 self.__scc.append(scc) 452 [ self.__sccMap.setdefault(_v, scc) for _v in scc ]
453 #print 'SCC at %s' % (' '.join( [str(_s) for _s in scc ]),) 454
455 - def scc (self, reset=False):
456 """Return the strongly-connected components of the graph. 457 458 The data structure is a set, each element of which is itself a 459 set containing one or more nodes from the graph. 460 461 @see: L{tarjan}. 462 """ 463 if reset or (self.__scc is None): 464 self.tarjan(reset) 465 return self.__scc
466 __scc = None 467
468 - def sccMap (self, reset=False):
469 """Return a map from nodes to the strongly-connected component 470 to which the node belongs. 471 472 @keyword reset: If C{True}, the L{tarjan} method will be 473 re-invoked, propagating the C{reset} value. If C{False} 474 (default), a cached value will be returned if available. 475 476 @see: L{tarjan}. 477 """ 478 if reset or (self.__sccMap is None): 479 self.tarjan(reset) 480 return self.__sccMap
481 __sccMap = None 482
483 - def sccOrder (self, reset=False):
484 """Return the strongly-connected components in order. 485 486 The data structure is a list, in dependency order, of strongly 487 connected components (which can be single nodes). Appearance 488 of a node in a set earlier in the list indicates that it has 489 no dependencies on any node that appears in a subsequent set. 490 This order is preferred over L{dfsOrder} for code generation, 491 since it detects loops. 492 493 @see: L{tarjan}. 494 """ 495 if reset or (self.__sccOrder is None): 496 self.tarjan(reset) 497 return self.__sccOrder
498 __sccOrder = None 499
500 - def sccForNode (self, node, **kw):
501 """Return the strongly-connected component to which the given 502 node belongs. 503 504 Any keywords suppliend when invoking this method are passed to 505 the L{sccMap} method. 506 507 @return: The SCC set, or C{None} if the node is not present in 508 the results of Tarjan's algorithm.""" 509 510 return self.sccMap(**kw).get(node, None)
511
512 - def cyclomaticComplexity (self):
513 """Return the cyclomatic complexity of the graph.""" 514 self.tarjan() 515 return len(self.__edges) - len(self.__nodes) + 2 * len(self.__scc)
516
517 - def __dfsWalk (self, source):
518 assert not (source in self.__dfsWalked) 519 self.__dfsWalked.add(source) 520 for target in self.__edgeMap.get(source, []): 521 if not (target in self.__dfsWalked): 522 self.__dfsWalk(target) 523 self.__dfsOrder.append(source)
524
525 - def _generateDOT (self, title='UNKNOWN', labeller=None):
526 node_map = { } 527 idx = 1 528 for n in self.__nodes: 529 node_map[n] = idx 530 idx += 1 531 text = [] 532 text.append('digraph "%s" {' % (title,)) 533 for n in self.__nodes: 534 if labeller is not None: 535 nn = labeller(n) 536 else: 537 nn = str(n) 538 text.append('%s [shape=box,label="%s"];' % (node_map[n], nn)) 539 for s in self.__nodes: 540 for d in self.__edgeMap.get(s, []): 541 if s != d: 542 text.append('%s -> %s;' % (node_map[s], node_map[d])) 543 text.append("};") 544 return "\n".join(text)
545
546 - def dfsOrder (self, reset=False):
547 """Return the nodes of the graph in U{depth-first-search 548 order<http://en.wikipedia.org/wiki/Depth-first_search>}. 549 550 The data structure is a list. Calculated lists are retained 551 and returned on future invocations, subject to the C{reset} 552 keyword. 553 554 @keyword reset: If C{True}, discard cached results and recompute the order.""" 555 if reset or (self.__dfsOrder is None): 556 self.__dfsWalked = set() 557 self.__dfsOrder = [] 558 for root in self.roots(reset=reset): 559 self.__dfsWalk(root) 560 self.__dfsWalked = None 561 if len(self.__dfsOrder) != len(self.__nodes): 562 raise Exception('DFS walk did not cover all nodes (walk %d versus nodes %d)' % (len(self.__dfsOrder), len(self.__nodes))) 563 return self.__dfsOrder
564 565 LocationPrefixRewriteMap_ = { } 566
567 -def SetLocationPrefixRewriteMap (prefix_map):
568 """Set the map that is used to by L{NormalizeLocation} to rewrite URI prefixes.""" 569 570 LocationPrefixRewriteMap_.clear() 571 LocationPrefixRewriteMap_.update(prefix_map)
572
573 -def NormalizeLocation (uri, parent_uri=None, prefix_map=None):
574 """Normalize a URI against an optional parent_uri in the way that is 575 done for C{schemaLocation} attribute values. 576 577 If no URI schema is present, this will normalize a file system 578 path. 579 580 Optionally, the resulting absolute URI can subsequently be 581 rewritten to replace specified prefix strings with alternative 582 strings, e.g. to convert a remote URI to a local repository. This 583 rewriting is done after the conversion to an absolute URI, but 584 before normalizing file system URIs. 585 586 @param uri : The URI to normalize. If C{None}, function returns 587 C{None} 588 589 @param parent_uri : The base URI against which normalization is 590 done, if C{uri} is a relative URI. 591 592 @param prefix_map : A map used to rewrite URI prefixes. If 593 C{None}, the value defaults to that stored by 594 L{SetLocationPrefixRewriteMap}. 595 596 """ 597 import urlparse 598 import os 599 600 if uri is None: 601 return uri 602 if parent_uri is None: 603 abs_uri = uri 604 else: 605 #if (0 > parent_uri.find(':')) and (not parent_uri.endswith(os.sep)): 606 # parent_uri = parent_uri + os.sep 607 abs_uri = urlparse.urljoin(parent_uri, uri) 608 if prefix_map is None: 609 prefix_map = LocationPrefixRewriteMap_ 610 for (pfx, sub) in prefix_map.items(): 611 if abs_uri.startswith(pfx): 612 abs_uri = sub + abs_uri[len(pfx):] 613 if 0 > abs_uri.find(':'): 614 abs_uri = os.path.realpath(abs_uri) 615 return abs_uri
616 617 import urlparse 618
619 -def TextFromURI (uri, archive_directory=None):
620 """Retrieve the contents of the uri as a text string. 621 622 If the uri does not include a scheme (e.g., C{http:}), it is 623 assumed to be a file path on the local system.""" 624 import urllib 625 import urllib2 626 stream = None 627 exc = None 628 # Only something that has a colon is a non-file URI. Some things 629 # that have a colon are a file URI (sans schema). Prefer urllib2, 630 # but allow urllib (which apparently works better on Windows). 631 if 0 <= uri.find(':'): 632 try: 633 stream = urllib2.urlopen(uri) 634 except Exception, e: 635 exc = e 636 if stream is None: 637 try: 638 stream = urllib.urlopen(uri) 639 exc = None 640 except: 641 # Prefer urllib exception 642 pass 643 if stream is None: 644 # No go as URI; give file a chance 645 try: 646 stream = file(uri) 647 exc = None 648 except Exception, e: 649 if exc is None: 650 exc = e 651 if exc is not None: 652 print 'TextFromURI: open %s caught: %s' % (uri, exc) 653 raise exc 654 try: 655 # Protect this in case whatever stream is doesn't have an fp 656 # attribute. 657 if isinstance(stream, file) or isinstance(stream.fp, file): 658 archive_directory = None 659 except: 660 pass 661 xmls = stream.read() 662 if archive_directory: 663 base_name = os.path.basename(os.path.normpath(urlparse.urlparse(uri)[2])) 664 counter = 1 665 dest_file = os.path.join(archive_directory, base_name) 666 while os.path.isfile(dest_file): 667 dest_file = os.path.join(archive_directory, '%s.%d' % (base_name, counter)) 668 counter += 1 669 try: 670 OpenOrCreate(dest_file).write(xmls) 671 except OSError, e: 672 print 'WARNING: Unable to save %s in %s: %s' % (uri, dest_file, e) 673 return xmls
674
675 -def OpenOrCreate (file_name, tag=None, preserve_contents=False):
676 """Return a file object used to write the given file. 677 678 Use the C{tag} keyword to preserve the contents of existing files 679 that are not supposed to be overwritten. 680 681 To get a writable file but leaving any existing contents in place, 682 set the C{preserve_contents} keyword to C{True}. Normally, existing file 683 contents are erased. 684 685 The returned file pointer is positioned at the end of the file. 686 687 @keyword tag: If not C{None} and the file already exists, absence 688 of the given value in the first 4096 bytes of the file causes an 689 C{IOError} to be raised with C{errno} set to C{EEXIST}. I.e., 690 only files with this value in the first 4KB will be returned for 691 writing. 692 693 @keyword preserve_contents: This value controls whether existing 694 contents of the file will be erased (C{False}, default) or left in 695 place (C{True}). 696 """ 697 (path, leaf) = os.path.split(file_name) 698 if path: 699 try: 700 os.makedirs(path) 701 except Exception, e: 702 if not (isinstance(e, (OSError, IOError)) and (errno.EEXIST == e.errno)): 703 raise 704 fp = file(file_name, 'ab+') 705 if (tag is not None) and (0 < os.fstat(fp.fileno()).st_size): 706 text = fp.read(4096) 707 if 0 > text.find(tag): 708 raise OSError(errno.EEXIST, os.strerror(errno.EEXIST)) 709 if not preserve_contents: 710 fp.seek(0) # os.SEEK_SET 711 fp.truncate() 712 else: 713 fp.seek(2) # os.SEEK_END 714 return fp
715 716 # hashlib didn't show up until 2.5, and sha is deprecated in 2.6. 717 __Hasher = None 718 try: 719 import hashlib 720 __Hasher = hashlib.sha1 721 except ImportError: 722 import sha 723 __Hasher = sha.new 724
725 -def HashForText (text):
726 """Calculate a cryptographic hash of the given string. 727 728 For example, this is used to verify that a given module file 729 contains bindings from a previous generation run for the same 730 namespace. See L{OpenOrCreate}. If the text is in Unicode, the 731 hash is calculated on the UTF-8 encoding of the text. 732 733 @return: A C{str}, generally a sequence of hexadecimal "digit"s. 734 """ 735 if isinstance(text, unicode): 736 text = text.encode('utf-8') 737 return __Hasher(text).hexdigest()
738 739 # uuid didn't show up until 2.5 740 __HaveUUID = False 741 try: 742 import uuid 743 __HaveUUID = True 744 except ImportError: 745 import time 746 import random
747 -def _NewUUIDString ():
748 """Obtain a UUID using the best available method. On a version of 749 python that does not incorporate the C{uuid} class, this creates a 750 string combining the current date and time (to the second) with a 751 random number. 752 753 @rtype: C{str} 754 """ 755 if __HaveUUID: 756 return uuid.uuid1().urn 757 return '%s:%08.8x' % (time.strftime('%Y%m%d%H%M%S'), random.randint(0, 0xFFFFFFFFL))
758
759 -class UniqueIdentifier (object):
760 """Records a unique identifier, generally associated with a 761 binding generation action. 762 763 The identifier is a string, but gets wrapped in an instance of 764 this class to optimize comparisons and reduce memory footprint. 765 766 Invoking the constructor for this class on the same string 767 multiple times will return the same Python object. 768 769 An instance of this class compares equal to, and hashes equivalent 770 to, the uid string. When C{str}'d, the result is the uid; when 771 C{repr}'d, the result is a constructor call to 772 C{pyxb.utils.utility.UniqueIdentifier}. 773 """ 774 775 # A map from UID string to the instance that represents it 776 __ExistingUIDs = {} 777
778 - def uid (self):
779 """The string unique identifier""" 780 return self.__uid
781 __uid = None 782 783 # Support pickling, which is done using only the UID.
784 - def __getnewargs__ (self):
785 return (self.__uid,)
786
787 - def __getstate__ (self):
788 return self.__uid
789
790 - def __setstate__ (self, state):
791 assert self.__uid == state
792 793 # Singleton-like
794 - def __new__ (cls, *args):
795 if 0 == len(args): 796 uid = _NewUUIDString() 797 else: 798 uid = args[0] 799 if isinstance(uid, UniqueIdentifier): 800 uid = uid.uid() 801 if not isinstance(uid, basestring): 802 raise TypeError('UniqueIdentifier uid must be a string') 803 rv = cls.__ExistingUIDs.get(uid) 804 if rv is None: 805 rv = super(UniqueIdentifier, cls).__new__(cls) 806 rv.__uid = uid 807 cls.__ExistingUIDs[uid] = rv 808 return rv
809
810 - def associateObject (self, obj):
811 """Associate the given object witth this identifier. 812 813 This is a one-way associatoin: the object is not provided with 814 a return path to this identifier instance.""" 815 self.__associatedObjects.add(obj)
816 - def associatedObjects (self):
817 """The set of objects that have been associated with this 818 identifier instance.""" 819 return self.__associatedObjects
820 __associatedObjects = None 821
822 - def __init__ (self, uid=None):
823 """Create a new UniqueIdentifier instance. 824 825 @param uid: The unique identifier string. If present, it is 826 the callers responsibility to ensure the value is universally 827 unique. If C{None}, one will be provided. 828 @type uid: C{str} or C{unicode} 829 """ 830 assert (uid is None) or (self.uid() == uid), 'UniqueIdentifier: ctor %s, actual %s' % (uid, self.uid()) 831 self.__associatedObjects = set()
832
833 - def __eq__ (self, other):
834 if other is None: 835 return False 836 elif isinstance(other, UniqueIdentifier): 837 other_uid = other.uid() 838 elif isinstance(other, basestring): 839 other_uid = other 840 else: 841 raise TypeError('UniqueIdentifier: Cannot compare with type %s' % (type(other),)) 842 return self.uid() == other_uid
843
844 - def __hash__ (self):
845 return hash(self.uid())
846
847 - def __str__ (self):
848 return self.uid()
849
850 - def __repr__ (self):
851 return 'pyxb.utils.utility.UniqueIdentifier(%s)' % (repr(self.uid()),)
852 853 import datetime 854 import calendar 855 import time
856 -class UTCOffsetTimeZone (datetime.tzinfo):
857 """A C{datetime.tzinfo} subclass that helps deal with UTC 858 conversions in an ISO8601 world. 859 860 This class only supports fixed offsets from UTC. 861 """ 862 863 # Regular expression that matches valid ISO8601 time zone suffixes 864 __Lexical_re = re.compile('^([-+])(\d\d):(\d\d)$') 865 866 # The offset in minutes east of UTC. 867 __utcOffset_min = 0 868 869 # Same as __utcOffset_min, but as a datetime.timedelta 870 __utcOffset_td = None 871 872 # A zero-length duration 873 __ZeroDuration = datetime.timedelta(0) 874 875 # Range limits 876 __MaxOffset_td = datetime.timedelta(hours=14) 877
878 - def __init__ (self, spec=None):
879 """Create a time zone instance with a fixed offset from UTC. 880 881 @param spec: Specifies the offset. Can be an integer counting 882 minutes east of UTC, the value C{None} (equal to 0 minutes 883 east), or a string that conform to the ISO8601 time zone 884 sequence (B{Z}, or B{[+-]HH:MM}). 885 """ 886 887 if spec is not None: 888 if isinstance(spec, basestring): 889 if 'Z' == spec: 890 self.__utcOffset_min = 0 891 else: 892 match = self.__Lexical_re.match(spec) 893 if match is None: 894 raise ValueError('Bad time zone: %s' % (spec,)) 895 self.__utcOffset_min = int(match.group(2)) * 60 + int(match.group(3)) 896 if '-' == match.group(1): 897 self.__utcOffset_min = - self.__utcOffset_min 898 elif isinstance(spec, int): 899 self.__utcOffset_min = spec 900 elif isinstance(spec, datetime.timedelta): 901 self.__utcOffset_min = spec.seconds / 60 902 else: 903 raise TypeError('%s: unexpected type %s' % (type(self), type(spec))) 904 self.__utcOffset_td = datetime.timedelta(minutes=self.__utcOffset_min) 905 if self.__utcOffset_td < -self.__MaxOffset_td or self.__utcOffset_td > self.__MaxOffset_td: 906 raise ValueError('XSD timezone offset %s larger than %s' % (self.__utcOffset_td, self.__MaxOffset_td)) 907 if 0 == self.__utcOffset_min: 908 self.__tzName = 'Z' 909 elif 0 > self.__utcOffset_min: 910 self.__tzName = '-%02d:%02d' % divmod(-self.__utcOffset_min, 60) 911 else: 912 self.__tzName = '+%02d:%02d' % divmod(self.__utcOffset_min, 60)
913
914 - def utcoffset (self, dt):
915 """Returns the constant offset for this zone.""" 916 return self.__utcOffset_td
917
918 - def tzname (self, dt):
919 """Return the name of the timezone in the format expected by XML Schema.""" 920 return self.__tzName
921
922 - def dst (self, dt):
923 """Returns a constant zero duration.""" 924 return self.__ZeroDuration
925
926 - def __cmp__ (self, other):
927 if isinstance(other, UTCOffsetTimeZone): 928 return cmp(self.__utcOffset_min, other.__utcOffset_min) 929 return cmp(self.__utcOffset_min, other.utcoffset(datetime.datetime.now()))
930 931
932 -class LocalTimeZone (datetime.tzinfo):
933 """A C{datetime.tzinfo} subclass for the local time zone. 934 935 Mostly pinched from the C{datetime.tzinfo} documentation in Python 2.5.1. 936 """ 937 938 __STDOffset = datetime.timedelta(seconds=-time.timezone) 939 __DSTOffset = __STDOffset 940 if time.daylight: 941 __DSTOffset = datetime.timedelta(seconds=-time.altzone) 942 __ZeroDelta = datetime.timedelta(0) 943 __DSTDelta = __DSTOffset - __STDOffset 944
945 - def utcoffset (self, dt):
946 if self.__isDST(dt): 947 return self.__DSTOffset 948 return self.__STDOffset
949
950 - def dst (self, dt):
951 if self.__isDST(dt): 952 return self.__DSTDelta 953 return self.__ZeroDelta
954
955 - def tzname (self, dt):
956 return time.tzname[self.__isDST(dt)]
957
958 - def __isDST (self, dt):
959 tt = (dt.year, dt.month, dt.day, 960 dt.hour, dt.minute, dt.second, 961 0, 0, -1) 962 tt = time.localtime(time.mktime(tt)) 963 return tt.tm_isdst > 0
964
965 -class PrivateTransient_mixin (pyxb.cscRoot):
966 """Emulate the B{transient} keyword from Java for private member 967 variables. 968 969 This class defines a C{__getstate__} method which returns a copy 970 of C{self.__dict__} with certain members removed. Specifically, 971 if a string "s" appears in a class member variable named 972 C{__PrivateTransient} defined in the "Class" class, then the 973 corresponding private variable "_Class__s" will be removed from 974 the state dictionary. This is used to eliminate unnecessary 975 fields from instances placed in L{namespace 976 archives<pyxb.namespace.archive.NamespaceArchive>} without having 977 to implement a C{__getstate__} method in every class in the 978 instance hierarchy. 979 980 For an example, see 981 L{pyxb.xmlschema.structures._SchemaComponent_mixin} 982 983 If you use this, it is your responsibility to define the 984 C{__PrivateTransient} class variable and add to it the required 985 variable names. 986 987 Classes that inherit from this are free to define their own 988 C{__getstate__} method, which may or may not invoke the superclass 989 one. If you do this, be sure that the class defining 990 C{__getstate__} lists L{PrivateTransient_mixin} as one of its 991 direct superclasses, lest the latter end up earlier in the mro and 992 consequently bypass the local override. 993 """ 994 995 # Suffix used when creating the class member variable in which the 996 # transient members are cached. 997 __Attribute = '__PrivateTransient' 998
999 - def __getstate__ (self):
1000 state = self.__dict__.copy() 1001 # Note that the aggregate set is stored in a class variable 1002 # with a slightly different name than the class-level set. 1003 attr = '_%s%s_' % (self.__class__.__name__, self.__Attribute) 1004 skipped = getattr(self.__class__, attr, None) 1005 if skipped is None: 1006 skipped = set() 1007 for cl in self.__class__.mro(): 1008 for (k, v) in cl.__dict__.items(): 1009 if k.endswith(self.__Attribute): 1010 cl2 = k[:-len(self.__Attribute)] 1011 skipped.update([ '%s__%s' % (cl2, _n) for _n in v ]) 1012 setattr(self.__class__, attr, skipped) 1013 #print 'Defined skipped for %s: %s' % (self.__class__, skipped) 1014 for k in skipped: 1015 if state.get(k) is not None: 1016 #print 'Stripping %s from instance %x of %s' % (k, id(self), type(self)) 1017 del state[k] 1018 # Uncomment the following to test whether undesirable types 1019 # are being pickled, generally by accidently leaving a 1020 # reference to one in an instance private member. 1021 #for (k, v) in state.items(): 1022 # import pyxb.namespace 1023 # import xml.dom 1024 # import pyxb.xmlschema.structures 1025 # if isinstance(v, (pyxb.namespace.resolution.NamespaceContext, xml.dom.Node, pyxb.xmlschema.structures.Schema)): 1026 # raise pyxb.LogicError('Unexpected instance of %s key %s in %s' % (type(v), k, self)) 1027 1028 return state
1029
1030 -def GetMatchingFiles (path, pattern=None, default_path_wildcard=None, default_path=None, prefix_pattern=None, prefix_substituend=None):
1031 """Provide a list of absolute paths to files present in any of a 1032 set of directories and meeting certain criteria. 1033 1034 This is used, for example, to locate namespace archive files 1035 within the archive path specified by the user. One could use:: 1036 1037 files = GetMatchingFiles('&bundles//:+', 1038 pattern=re.compile('.*\.wxs$'), 1039 default_path_wildcard='+', 1040 default_path='/usr/local/pyxb/nsarchives', 1041 prefix_pattern='&', 1042 prefix_substituend='/opt/pyxb') 1043 1044 to obtain all files that can be recursively found within 1045 C{/opt/pyxb/bundles}, or non-recursively within 1046 C{/usr/local/pyxb/nsarchives}. 1047 1048 @param path: A colon separated list of directories in which the 1049 search should be performed. If a path entry ends with C{//}, any 1050 directory beneath it is scanned as well, recursively. 1051 1052 @keyword pattern: Optional regular expression object used to 1053 determine whether a given directory entry should be returned. If 1054 left as C{None}, all directory entries will be returned. 1055 1056 @keyword default_path_wildcard: An optional string which, if 1057 present as a single directory in the path, is replaced by the 1058 value of C{default-path}. 1059 1060 @keyword default_path: A system-defined directory which can be 1061 restored to the path by placing the C{default_path_wildcard} in 1062 the C{path}. 1063 1064 @keyword prefix_pattern: An optional string which, if present at 1065 the start of a path element, is replaced by the value of 1066 C{prefix_substituend}. 1067 1068 @keyword prefix_substituend: A system-defined string (path prefix) 1069 which can be combined with the user-provided path information to 1070 identify a file or subdirectory within an installation-specific 1071 area. 1072 """ 1073 matching_files = [] 1074 path_set = path.split(':') 1075 while 0 < len(path_set): 1076 path = path_set.pop(0) 1077 if default_path_wildcard == path: 1078 if default_path is not None: 1079 path_set[0:0] = default_path.split(':') 1080 default_path = None 1081 continue 1082 recursive = False 1083 if (prefix_pattern is not None) and path.startswith(prefix_pattern): 1084 path = os.path.join(prefix_substituend, path[len(prefix_pattern):]) 1085 if path.endswith('//'): 1086 recursive = True 1087 path = path[:-2] 1088 if os.path.isfile(path): 1089 if (pattern is None) or (pattern.search(path) is not None): 1090 matching_files.append(path) 1091 else: 1092 for (root, dirs, files) in os.walk(path): 1093 for f in files: 1094 if (pattern is None) or (pattern.search(f) is not None): 1095 matching_files.append(os.path.join(root, f)) 1096 if not recursive: 1097 break 1098 return matching_files
1099
1100 -class _LocationBase (object):
1101 """Wrap a location. 1102 1103 This is probably a string, but might be a uri object or the like. 1104 Really we only have this as a separate object so as to avoid 1105 creating hundreds of copies of the same string.""" 1106 1107 __locationBase = None
1108 - def locationBase (self):
1109 return self.__locationBase
1110
1111 - def __init__ (self, location_base):
1112 if isinstance(location_base, _LocationBase): 1113 location_base = location_base.locationBase() 1114 self.__locationBase = location_base
1115
1116 - def __str__ (self):
1117 return str(self.__locationBase)
1118
1119 -class Location (object):
1120 __locationBase = None 1121 __lineNumber = None 1122 __columnNumber = None 1123
1124 - def __init__ (self, location_base=None, line_number=None, column_number=None):
1125 if not isinstance(location_base, _LocationBase): 1126 location_base = _LocationBase(location_base) 1127 self.__locationBase = location_base 1128 self.__lineNumber = line_number 1129 self.__columnNumber = column_number
1130
1131 - def newLocation (self, locator=None, line_number=None, column_number=None):
1132 if locator is not None: 1133 try: 1134 line_number = locator.getLineNumber() 1135 column_number = locator.getColumnNumber() 1136 except: 1137 pass 1138 return Location(self.__locationBase, line_number, column_number)
1139 1140 locationBase = property(lambda _s: _s.__locationBase) 1141 lineNumber = property(lambda _s: _s.__lineNumber) 1142 columnNumber = property(lambda _s: _s.__columnNumber) 1143
1144 - def __str__ (self):
1145 if self.locationBase is None: 1146 return '<unknownLocation>' 1147 return '%s[%s:%s]' % (self.locationBase, self.lineNumber, self.columnNumber)
1148
1149 -class Locatable_mixin (pyxb.cscRoot):
1150 __location = None 1151
1152 - def __init__ (self, *args, **kw):
1153 self.__location = kw.pop('location', None) 1154 super(Locatable_mixin, self).__init__(*args, **kw)
1155
1156 - def _setLocation (self, location):
1157 self.__location = location
1158
1159 - def _location (self):
1160 return self.__location
1161