pyxb.utils.utility

1 # -*- coding: utf-8 -*- 2 # Copyright 2009-2013, Peter A. Bigot 3 # 4 # Licensed under the Apache License, Version 2.0 (the "License"); you may 5 # not use this file except in compliance with the License. You may obtain a 6 # copy of the License at: 7 # 8 # http://www.apache.org/licenses/LICENSE-2.0 9 # 10 # Unless required by applicable law or agreed to in writing, software 11 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 # License for the specific language governing permissions and limitations 14 # under the License. 15 16 """Utility functions and classes.""" 17 18 import re 19 import os 20 import errno 21 import pyxb 22 import urlparse 23 import time 24 import datetime 25 import logging 26 27 _log = logging.getLogger(__name__)

28 29 -def BackfillComparisons (cls):

30 """Class decorator that fills in missing ordering methods. 31 32 Concept derived from Python 2.7.5 functools.total_ordering, 33 but this version requires that __eq__ and __lt__ be provided, 34 and unconditionally overrides __ne__, __gt__, __le__, and __ge__ 35 with the derived versions. 36 37 This is still necessary in Python 3 because in Python 3 the 38 comparison x >= y is done by the __ge__ inherited from object, 39 which does not handle the case where x and y are not the same type 40 even if the underlying y < x would convert x to be compatible. """ 41 42 def applyconvert (cls, derived): 43 for (opn, opx) in derived: 44 opx.__name__ = opn 45 opx.__doc__ = getattr(int, opn).__doc__ 46 setattr(cls, opn, opx)

47 48 applyconvert(cls, ( 49 ('__gt__', lambda self, other: not (self.__lt__(other) or self.__eq__(other))), 50 ('__le__', lambda self, other: self.__lt__(other) or self.__eq__(other)), 51 ('__ge__', lambda self, other: not self.__lt__(other)) 52 )) 53 applyconvert(cls, ( 54 ('__ne__', lambda self, other: not self.__eq__(other)), 55 )) 56 return cls 57

58 -def IteratedCompareMixed (lhs, rhs):

59 """Tuple comparison that permits C{None} as lower than any value, 60 and defines other cross-type comparison. 61 62 @return: -1 if lhs < rhs, 0 if lhs == rhs, 1 if lhs > rhs.""" 63 li = iter(lhs) 64 ri = iter(rhs) 65 while True: 66 try: 67 (lv, rv) = (li.next(), ri.next()) 68 if lv is None: 69 if rv is None: 70 continue 71 return -1 72 if rv is None: 73 return 1 74 if lv == rv: 75 continue 76 if lv < rv: 77 return -1 78 return 1 79 except StopIteration: 80 nl = len(lhs) 81 nr = len(rhs) 82 if nl < nr: 83 return -1 84 if nl == nr: 85 return 0 86 return 1

87

88 -def QuotedEscaped (s):

89 """Convert a string into a literal value that can be used in Python source. 90 91 This just calls C{repr}. No point in getting all complex when the language 92 already gives us what we need. 93 94 @rtype: C{str} 95 """ 96 return repr(s)

97

98 -def _DefaultXMLIdentifierToPython (identifier):

99 """Default implementation for _XMLIdentifierToPython 100 101 For historical reasons, this converts the identifier from a str to 102 unicode in the system default encoding. This should have no 103 practical effect. 104 105 @param identifier : some XML identifier 106 107 @return: C{unicode(identifier)} 108 """ 109 110 return unicode(identifier)

111

112 -def _SetXMLIdentifierToPython (xml_identifier_to_python):

113 """Configure a callable L{MakeIdentifier} uses to pre-process an XM Lidentifier. 114 115 In Python3, identifiers can be full Unicode tokens, but in Python2, 116 all identifiers must be ASCII characters. L{MakeIdentifier} enforces 117 this by removing all characters that are not valid within an 118 identifier. 119 120 In some cases, an application generating bindings may be able to 121 transliterate Unicode code points that are not valid Python identifier 122 characters into something else. This callable can be assigned to 123 perform that translation before the invalid characters are 124 stripped. 125 126 It is not the responsibility of this callable to do anything other 127 than replace whatever characters it wishes to. All 128 transformations performed by L{MakeIdentifier} will still be 129 applied, to ensure the output is in fact a legal identifier. 130 131 @param xml_identifier_to_python : A callable that takes a string 132 and returns a Unicode, possibly with non-identifier characters 133 replaced by other characters. Pass C{None} to reset to the 134 default implementation, which is L{_DefaultXMLIdentifierToPython}. 135 136 @rtype: C{unicode} 137 """ 138 global _XMLIdentifierToPython 139 if xml_identifier_to_python is None: 140 xml_identifier_to_python = _DefaultXMLIdentifierToPython 141 _XMLIdentifierToPython = xml_identifier_to_python

142 143 _XMLIdentifierToPython = _DefaultXMLIdentifierToPython 144 145 _UnderscoreSubstitute_re = re.compile(r'[- .]') 146 _NonIdentifier_re = re.compile(r'[^a-zA-Z0-9_]') 147 _PrefixUnderscore_re = re.compile(r'^_+') 148 _PrefixDigit_re = re.compile(r'^\d+') 149 _CamelCase_re = re.compile(r'_\w')

150 151 -def MakeIdentifier (s, camel_case=False):

152 """Convert a string into something suitable to be a Python identifier. 153 154 The string is processed by L{_XMLIdentifierToPython}. Following 155 this, dashes, spaces, and periods are replaced by underscores, and 156 characters not permitted in Python identifiers are stripped. 157 Furthermore, any leading underscores are removed. If the result 158 begins with a digit, the character 'n' is prepended. If the 159 result is the empty string, the string 'emptyString' is 160 substituted. 161 162 No check is made for L{conflicts with keywords <DeconflictKeyword>}. 163 164 @keyword camel_case : If C{True}, any underscore in the result 165 string that is immediately followed by an alphanumeric is replaced 166 by the capitalized version of that alphanumeric. Thus, 167 'one_or_two' becomes 'oneOrTwo'. If C{False} (default), has no 168 effect. 169 170 @rtype: C{str} 171 """ 172 s = _XMLIdentifierToPython(s) 173 s = _PrefixUnderscore_re.sub('', _NonIdentifier_re.sub('', _UnderscoreSubstitute_re.sub('_', s))) 174 if camel_case: 175 s = _CamelCase_re.sub(lambda _m: _m.group(0)[1].upper(), s) 176 if _PrefixDigit_re.match(s): 177 s = 'n' + s 178 if 0 == len(s): 179 s = 'emptyString' 180 return s

181 182 _PythonKeywords = frozenset( ( 183 "and", "as", "assert", "break", "class", "continue", "def", "del", 184 "elif", "else", "except", "exec", "finally", "for", "from", "global", 185 "if", "import", "in", "is", "lambda", "not", "or", "pass", "print", 186 "raise", "return", "try", "while", "with", "yield" 187 ) ) 188 """Python keywords. Note that types like int and float are not 189 keywords. 190 191 @see: U{http://docs.python.org/reference/lexical_analysis.html#keywords}.""" 192 193 _PythonBuiltInConstants = frozenset( ( 194 "False", "True", "None", "NotImplemented", "Ellipsis", "__debug__", 195 # "set" is neither a keyword nor a constant, but if some fool 196 # like {http://www.w3.org/2001/SMIL20/}set gets defined there's 197 # no way to access the builtin constructor. 198 "set" 199 ) ) 200 """Other symbols that aren't keywords but that can't be used. 201 202 @see: U{http://docs.python.org/library/constants.html}.""" 203 204 _Keywords = frozenset(_PythonKeywords.union(_PythonBuiltInConstants)) 205 """The keywords reserved for Python, derived from L{_PythonKeywords} 206 and L{_PythonBuiltInConstants}."""

207 208 -def DeconflictKeyword (s, aux_keywords=frozenset()):

209 """If the provided string C{s} matches a Python language keyword, 210 append an underscore to distinguish them. 211 212 See also L{MakeUnique}. 213 214 @param s: string to be deconflicted 215 216 @keyword aux_keywords: optional iterable of additional strings 217 that should be treated as keywords. 218 219 @rtype: C{str} 220 221 """ 222 if (s in _Keywords) or (s in aux_keywords): 223 return '%s_' % (s,) 224 return s

225

226 -def MakeUnique (s, in_use):

227 """Return an identifier based on C{s} that is not in the given set. 228 229 The returned identifier is made unique by appending an underscore 230 and, if necessary, a serial number. 231 232 The order is : C{x}, C{x_}, C{x_2}, C{x_3}, ... 233 234 @param in_use: The set of identifiers already in use in the 235 relevant scope. C{in_use} is updated to contain the returned 236 identifier. 237 238 @rtype: C{str} 239 """ 240 if s in in_use: 241 ctr = 2 242 s = s.rstrip('_') 243 candidate = '%s_' % (s,) 244 while candidate in in_use: 245 candidate = '%s_%d' % (s, ctr) 246 ctr += 1 247 s = candidate 248 in_use.add(s) 249 return s

250

251 -def PrepareIdentifier (s, in_use, aux_keywords=frozenset(), private=False, protected=False):

252 """Combine everything required to create a unique identifier. 253 254 Leading and trailing underscores are stripped from all 255 identifiers. 256 257 @param in_use: the set of already used identifiers. Upon return 258 from this function, it is updated to include the returned 259 identifier. 260 261 @keyword aux_keywords: an optional set of additional symbols that 262 are illegal in the given context; use this to prevent conflicts 263 with known method names. 264 265 @keyword private: if C{False} (default), all leading underscores 266 are stripped, guaranteeing the identifier will not be private. If 267 C{True}, the returned identifier has two leading underscores, 268 making it a private variable within a Python class. 269 270 @keyword protected: as for C{private}, but uses only one 271 underscore. 272 273 @rtype: C{str} 274 275 @note: Only module-level identifiers should be treated as 276 protected. The class-level L{_DeconflictSymbols_mixin} 277 infrastructure does not include protected symbols. All class and 278 instance members beginning with a single underscore are reserved 279 for the PyXB infrastructure.""" 280 s = DeconflictKeyword(MakeIdentifier(s).strip('_'), aux_keywords) 281 if private: 282 s = '__' + s 283 elif protected: 284 s = '_' + s 285 return MakeUnique(s, in_use)

286

287 # @todo: descend from pyxb.cscRoot, if we import pyxb 288 -class _DeconflictSymbols_mixin (object):

289 """Mix-in used to deconflict public symbols in classes that may be 290 inherited by generated binding classes. 291 292 Some classes, like the L{pyxb.binding.basis.element} or 293 L{pyxb.binding.basis.simpleTypeDefinition} classes in 294 L{pyxb.binding.basis}, have public symbols associated with 295 functions and variables. It is possible that an XML schema might 296 include tags and attribute names that match these symbols. To 297 avoid conflict, the reserved symbols marked in this class are 298 added to the pre-defined identifier set. 299 300 Subclasses should create a class-level variable 301 C{_ReservedSymbols} that contains a set of strings denoting the 302 symbols reserved in this class, combined with those from any 303 superclasses that also have reserved symbols. Code like the 304 following is suggested:: 305 306 # For base classes (direct mix-in): 307 _ReservedSymbols = set([ 'one', 'two' ]) 308 # For subclasses: 309 _ReservedSymbols = SuperClass._ReservedSymbols.union(set([ 'three' ])) 310 311 Only public symbols (those with no underscores) are currently 312 supported. (Private symbols can't be deconflicted that easily, 313 and no protected symbols that derive from the XML are created by 314 the binding generator.) 315 """ 316 317 _ReservedSymbols = set() 318 """There are no reserved symbols in the base class."""

319 320 # Regular expression detecting tabs, carriage returns, and line feeds 321 __TabCRLF_re = re.compile("[\t\n\r]") 322 # Regular expressoin detecting sequences of two or more spaces 323 __MultiSpace_re = re.compile(" +")

324 325 -def NormalizeWhitespace (text, preserve=False, replace=False, collapse=False):

326 """Normalize the given string. 327 328 Exactly one of the C{preserve}, C{replace}, and C{collapse} keyword 329 parameters must be assigned the value C{True} by the caller. 330 331 - C{preserve}: the text is returned unchanged. 332 333 - C{replace}: all tabs, newlines, and carriage returns are 334 replaced with ASCII spaces. 335 336 - C{collapse}: the C{replace} normalization is done, then 337 sequences of two or more spaces are replaced by a single space. 338 339 See the U{whiteSpace facet<http://www.w3.org/TR/xmlschema-2/#rf-whiteSpace>}. 340 341 @rtype: C{str} 342 """ 343 if preserve: 344 return text 345 text = __TabCRLF_re.sub(' ', text) 346 if replace: 347 return text 348 if collapse: 349 return __MultiSpace_re.sub(' ', text).strip() 350 # pyxb not imported here; could be. 351 raise Exception('NormalizeWhitespace: No normalization specified')

352

353 -class Graph:

354 """Represent a directed graph with arbitrary objects as nodes. 355 356 This is used in the L{code 357 generator<pyxb.binding.generate.Generator>} to determine order 358 dependencies among components within a namespace, and schema that 359 comprise various namespaces. An edge from C{source} to C{target} 360 indicates that some aspect of C{source} requires that some aspect 361 of C{target} already be available. 362 """ 363

364 - def __init__ (self, root=None):

365 self.__roots = None 366 if root is not None: 367 self.__roots = set([root]) 368 self.__edges = set() 369 self.__edgeMap = { } 370 self.__reverseMap = { } 371 self.__nodes = set()

372 373 __scc = None 374 __sccMap = None 375 __dfsOrder = None 376

377 - def addEdge (self, source, target):

378 """Add a directed edge from the C{source} to the C{target}. 379 380 The nodes are added to the graph if necessary. 381 """ 382 self.__edges.add( (source, target) ) 383 self.__edgeMap.setdefault(source, set()).add(target) 384 if source != target: 385 self.__reverseMap.setdefault(target, set()).add(source) 386 self.__nodes.add(source) 387 self.__nodes.add(target)

388

389 - def addNode (self, node):

390 """Add the given node to the graph.""" 391 self.__nodes.add(node)

392 393 __roots = None

394 - def roots (self, reset=False):

395 """Return the set of nodes calculated to be roots (i.e., those that have no incoming edges). 396 397 This caches the roots calculated in a previous invocation 398 unless the C{reset} keyword is given the value C{True}. 399 400 @note: Upon reset, any notes that had been manually added 401 using L{addNode} will no longer be in the set. 402 403 @keyword reset: If C{True}, any cached value is discarded and 404 recomputed. No effect if C{False} (defalut). 405 406 @rtype: C{set} 407 """ 408 if reset or (self.__roots is None): 409 self.__roots = set() 410 for n in self.__nodes: 411 if not (n in self.__reverseMap): 412 self.__roots.add(n) 413 return self.__roots

414 - def addRoot (self, root):

415 """Add the provided node as a root node, even if it has incoming edges. 416 417 The node need not be present in the graph (if necessary, it is added). 418 419 Note that roots added in this way do not survive a reset using 420 L{roots}. 421 422 @return: C{self} 423 """ 424 if self.__roots is None: 425 self.__roots = set() 426 self.__nodes.add(root) 427 self.__roots.add(root) 428 return self

429

430 - def edgeMap (self):

431 """Return the edges in the graph. 432 433 The edge data structure is a map from the source node to the 434 set of nodes that can be reached in a single step from the 435 source. 436 """ 437 return self.__edgeMap

438 __edgeMap = None 439

440 - def edges (self):

441 """Return the edges in the graph. 442 443 The edge data structure is a set of node pairs represented as C{( source, target )}. 444 """ 445 return self.__edges

446

447 - def nodes (self):

448 """Return the set of nodes in the graph. 449 450 The node collection data structure is a set containing node 451 objects, whatever they may be.""" 452 return self.__nodes

453

454 - def tarjan (self, reset=False):

455 """Execute Tarjan's algorithm on the graph. 456 457 U{Tarjan's 458 algorithm<http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm>} 459 computes the U{strongly-connected 460 components<http://en.wikipedia.org/wiki/Strongly_connected_component>} 461 of the graph: i.e., the sets of nodes that form a minimal 462 closed set under edge transition. In essence, the loops. We 463 use this to detect groups of components that have a dependency 464 cycle. 465 466 @keyword reset: If C{True}, any cached component set is erased 467 and recomputed. If C{True}, an existing previous result is 468 left unchanged.""" 469 470 if (self.__scc is not None) and (not reset): 471 return 472 self.__sccMap = { } 473 self.__stack = [] 474 self.__sccOrder = [] 475 self.__scc = [] 476 self.__index = 0 477 self.__tarjanIndex = { } 478 self.__tarjanLowLink = { } 479 for v in self.__nodes: 480 self.__tarjanIndex[v] = None 481 roots = self.roots() 482 if (0 == len(roots)) and (0 < len(self.__nodes)): 483 raise Exception('TARJAN: No roots found in graph with %d nodes' % (len(self.__nodes),)) 484 for r in roots: 485 self._tarjan(r) 486 self.__didTarjan = True

487

488 - def _tarjan (self, v):

489 """Do the work of Tarjan's algorithm for a given root node.""" 490 if self.__tarjanIndex.get(v) is not None: 491 # "Root" was already reached. 492 return 493 self.__tarjanIndex[v] = self.__tarjanLowLink[v] = self.__index 494 self.__index += 1 495 self.__stack.append(v) 496 source = v 497 for target in self.__edgeMap.get(source, []): 498 if self.__tarjanIndex[target] is None: 499 self._tarjan(target) 500 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target]) 501 elif target in self.__stack: 502 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target]) 503 else: 504 pass 505 506 if self.__tarjanLowLink[v] == self.__tarjanIndex[v]: 507 scc = [] 508 while True: 509 scc.append(self.__stack.pop()) 510 if v == scc[-1]: 511 break 512 self.__sccOrder.append(scc) 513 if 1 < len(scc): 514 self.__scc.append(scc) 515 [ self.__sccMap.setdefault(_v, scc) for _v in scc ]

516

517 - def scc (self, reset=False):

518 """Return the strongly-connected components of the graph. 519 520 The data structure is a set, each element of which is itself a 521 set containing one or more nodes from the graph. 522 523 @see: L{tarjan}. 524 """ 525 if reset or (self.__scc is None): 526 self.tarjan(reset) 527 return self.__scc

528 __scc = None 529

530 - def sccMap (self, reset=False):

531 """Return a map from nodes to the strongly-connected component 532 to which the node belongs. 533 534 @keyword reset: If C{True}, the L{tarjan} method will be 535 re-invoked, propagating the C{reset} value. If C{False} 536 (default), a cached value will be returned if available. 537 538 @see: L{tarjan}. 539 """ 540 if reset or (self.__sccMap is None): 541 self.tarjan(reset) 542 return self.__sccMap

543 __sccMap = None 544

545 - def sccOrder (self, reset=False):

546 """Return the strongly-connected components in order. 547 548 The data structure is a list, in dependency order, of strongly 549 connected components (which can be single nodes). Appearance 550 of a node in a set earlier in the list indicates that it has 551 no dependencies on any node that appears in a subsequent set. 552 This order is preferred over L{dfsOrder} for code generation, 553 since it detects loops. 554 555 @see: L{tarjan}. 556 """ 557 if reset or (self.__sccOrder is None): 558 self.tarjan(reset) 559 return self.__sccOrder

560 __sccOrder = None 561

562 - def sccForNode (self, node, **kw):

563 """Return the strongly-connected component to which the given 564 node belongs. 565 566 Any keywords suppliend when invoking this method are passed to 567 the L{sccMap} method. 568 569 @return: The SCC set, or C{None} if the node is not present in 570 the results of Tarjan's algorithm.""" 571 572 return self.sccMap(**kw).get(node)

573

574 - def cyclomaticComplexity (self):

575 """Return the cyclomatic complexity of the graph.""" 576 self.tarjan() 577 return len(self.__edges) - len(self.__nodes) + 2 * len(self.__scc)

578

579 - def __dfsWalk (self, source):

580 assert not (source in self.__dfsWalked) 581 self.__dfsWalked.add(source) 582 for target in self.__edgeMap.get(source, []): 583 if not (target in self.__dfsWalked): 584 self.__dfsWalk(target) 585 self.__dfsOrder.append(source)

586

587 - def _generateDOT (self, title='UNKNOWN', labeller=None):

588 node_map = { } 589 idx = 1 590 for n in self.__nodes: 591 node_map[n] = idx 592 idx += 1 593 text = [] 594 text.append('digraph "%s" {' % (title,)) 595 for n in self.__nodes: 596 if labeller is not None: 597 nn = labeller(n) 598 else: 599 nn = str(n) 600 text.append('%s [shape=box,label="%s"];' % (node_map[n], nn)) 601 for s in self.__nodes: 602 for d in self.__edgeMap.get(s, []): 603 if s != d: 604 text.append('%s -> %s;' % (node_map[s], node_map[d])) 605 text.append("};") 606 return "\n".join(text)

607

608 - def dfsOrder (self, reset=False):

609 """Return the nodes of the graph in U{depth-first-search 610 order<http://en.wikipedia.org/wiki/Depth-first_search>}. 611 612 The data structure is a list. Calculated lists are retained 613 and returned on future invocations, subject to the C{reset} 614 keyword. 615 616 @keyword reset: If C{True}, discard cached results and recompute the order.""" 617 if reset or (self.__dfsOrder is None): 618 self.__dfsWalked = set() 619 self.__dfsOrder = [] 620 for root in self.roots(reset=reset): 621 self.__dfsWalk(root) 622 self.__dfsWalked = None 623 if len(self.__dfsOrder) != len(self.__nodes): 624 raise Exception('DFS walk did not cover all nodes (walk %d versus nodes %d)' % (len(self.__dfsOrder), len(self.__nodes))) 625 return self.__dfsOrder

626

627 - def rootSetOrder (self):

628 """Return the nodes of the graph as a sequence of root sets. 629 630 The first root set is the set of nodes that are roots: i.e., 631 have no incoming edges. The second root set is the set of 632 nodes that have incoming nodes in the first root set. This 633 continues until all nodes have been reached. The sets impose 634 a partial order on the nodes, without being as constraining as 635 L{sccOrder}. 636 637 @return: a list of the root sets.""" 638 order = [] 639 nodes = set(self.__nodes) 640 edge_map = {} 641 for (d, srcs) in self.__edgeMap.iteritems(): 642 edge_map[d] = srcs.copy() 643 while nodes: 644 freeset = set() 645 for n in nodes: 646 if not (n in edge_map): 647 freeset.add(n) 648 if 0 == len(freeset): 649 _log.error('dependency cycle in named components') 650 return None 651 order.append(freeset) 652 nodes.difference_update(freeset) 653 new_edge_map = {} 654 for (d, srcs) in edge_map.iteritems(): 655 srcs.difference_update(freeset) 656 if 0 != len(srcs): 657 new_edge_map[d] = srcs 658 edge_map = new_edge_map 659 return order

660 661 LocationPrefixRewriteMap_ = { }

662 663 -def SetLocationPrefixRewriteMap (prefix_map):

664 """Set the map that is used to by L{NormalizeLocation} to rewrite URI prefixes.""" 665 666 LocationPrefixRewriteMap_.clear() 667 LocationPrefixRewriteMap_.update(prefix_map)

668

669 -def NormalizeLocation (uri, parent_uri=None, prefix_map=None):

670 """Normalize a URI against an optional parent_uri in the way that is 671 done for C{schemaLocation} attribute values. 672 673 If no URI schema is present, this will normalize a file system 674 path. 675 676 Optionally, the resulting absolute URI can subsequently be 677 rewritten to replace specified prefix strings with alternative 678 strings, e.g. to convert a remote URI to a local repository. This 679 rewriting is done after the conversion to an absolute URI, but 680 before normalizing file system URIs. 681 682 @param uri : The URI to normalize. If C{None}, function returns 683 C{None} 684 685 @param parent_uri : The base URI against which normalization is 686 done, if C{uri} is a relative URI. 687 688 @param prefix_map : A map used to rewrite URI prefixes. If 689 C{None}, the value defaults to that stored by 690 L{SetLocationPrefixRewriteMap}. 691 692 """ 693 if uri is None: 694 return uri 695 if parent_uri is None: 696 abs_uri = uri 697 else: 698 #if (0 > parent_uri.find(':')) and (not parent_uri.endswith(os.sep)): 699 # parent_uri = parent_uri + os.sep 700 abs_uri = urlparse.urljoin(parent_uri, uri) 701 if prefix_map is None: 702 prefix_map = LocationPrefixRewriteMap_ 703 for (pfx, sub) in prefix_map.iteritems(): 704 if abs_uri.startswith(pfx): 705 abs_uri = sub + abs_uri[len(pfx):] 706 if 0 > abs_uri.find(':'): 707 abs_uri = os.path.realpath(abs_uri) 708 return abs_uri

709

710 711 -def DataFromURI (uri, archive_directory=None):

712 """Retrieve the contents of the uri as raw data. 713 714 If the uri does not include a scheme (e.g., C{http:}), it is 715 assumed to be a file path on the local system.""" 716 import urllib 717 import urllib2 718 stream = None 719 exc = None 720 # Only something that has a colon is a non-file URI. Some things 721 # that have a colon are a file URI (sans schema). Prefer urllib2, 722 # but allow urllib (which apparently works better on Windows). 723 if 0 <= uri.find(':'): 724 try: 725 stream = urllib2.urlopen(uri) 726 except Exception as e: 727 exc = e 728 if stream is None: 729 try: 730 stream = urllib.urlopen(uri) 731 exc = None 732 except: 733 # Prefer urllib exception 734 pass 735 if stream is None: 736 # No go as URI; give file a chance 737 try: 738 stream = open(uri, 'rb') 739 exc = None 740 except Exception as e: 741 if exc is None: 742 exc = e 743 if exc is not None: 744 _log.error('open %s', uri, exc_info=exc) 745 raise exc 746 try: 747 # Protect this in case whatever stream is doesn't have an fp 748 # attribute. 749 if isinstance(stream, file) or isinstance(stream.fp, file): 750 archive_directory = None 751 except: 752 pass 753 xmld = stream.read() 754 if archive_directory: 755 base_name = os.path.basename(os.path.normpath(urlparse.urlparse(uri)[2])) 756 counter = 1 757 dest_file = os.path.join(archive_directory, base_name) 758 while os.path.isfile(dest_file): 759 dest_file = os.path.join(archive_directory, '%s.%d' % (base_name, counter)) 760 counter += 1 761 try: 762 OpenOrCreate(dest_file).write(xmld) 763 except OSError as e: 764 _log.warning('Unable to save %s in %s: %s', uri, dest_file, e) 765 return xmld

766

767 -def OpenOrCreate (file_name, tag=None, preserve_contents=False):

768 """Return a file object used to write binary data into the given file. 769 770 Use the C{tag} keyword to preserve the contents of existing files 771 that are not supposed to be overwritten. 772 773 To get a writable file but leaving any existing contents in place, 774 set the C{preserve_contents} keyword to C{True}. Normally, existing file 775 contents are erased. 776 777 The returned file pointer is positioned at the end of the file. 778 779 @keyword tag: If not C{None} and the file already exists, absence 780 of the given value in the first 4096 bytes of the file (decoded as 781 UTF-8) causes an C{IOError} to be raised with C{errno} set to 782 C{EEXIST}. I.e., only files with this value in the first 4KB will 783 be returned for writing. 784 785 @keyword preserve_contents: This value controls whether existing 786 contents of the file will be erased (C{False}, default) or left in 787 place (C{True}). 788 """ 789 (path, leaf) = os.path.split(file_name) 790 if path: 791 try: 792 os.makedirs(path) 793 except Exception as e: 794 if not (isinstance(e, (OSError, IOError)) and (errno.EEXIST == e.errno)): 795 raise 796 fp = open(file_name, 'ab+') 797 if (tag is not None) and (0 < os.fstat(fp.fileno()).st_size): 798 fp.seek(0) # os.SEEK_SET 799 blockd = fp.read(4096) 800 blockt = blockd.decode('utf-8') 801 if 0 > blockt.find(tag): 802 raise OSError(errno.EEXIST, os.strerror(errno.EEXIST)) 803 if not preserve_contents: 804 fp.seek(0) # os.SEEK_SET 805 fp.truncate() 806 else: 807 fp.seek(2) # os.SEEK_END 808 return fp

809 810 # hashlib didn't show up until 2.5, and sha is deprecated in 2.6. 811 __Hasher = None 812 try: 813 import hashlib 814 __Hasher = hashlib.sha1 815 except ImportError: 816 import sha 817 __Hasher = sha.new

818 819 -def HashForText (text):

820 """Calculate a cryptographic hash of the given string. 821 822 For example, this is used to verify that a given module file 823 contains bindings from a previous generation run for the same 824 namespace. See L{OpenOrCreate}. If the text is in Unicode, the 825 hash is calculated on the UTF-8 encoding of the text. 826 827 @return: A C{str}, generally a sequence of hexadecimal "digit"s. 828 """ 829 if isinstance(text, unicode): 830 text = text.encode('utf-8') 831 return __Hasher(text).hexdigest()

832 833 # uuid didn't show up until 2.5 834 __HaveUUID = False 835 try: 836 import uuid 837 __HaveUUID = True 838 except ImportError: 839 import random

840 -def _NewUUIDString ():

841 """Obtain a UUID using the best available method. On a version of 842 python that does not incorporate the C{uuid} class, this creates a 843 string combining the current date and time (to the second) with a 844 random number. 845 846 @rtype: C{str} 847 """ 848 if __HaveUUID: 849 return uuid.uuid1().urn 850 return '%s:%08.8x' % (time.strftime('%Y%m%d%H%M%S'), random.randint(0, 0xFFFFFFFFL))

851

852 -class UniqueIdentifier (object):

853 """Records a unique identifier, generally associated with a 854 binding generation action. 855 856 The identifier is a string, but gets wrapped in an instance of 857 this class to optimize comparisons and reduce memory footprint. 858 859 Invoking the constructor for this class on the same string 860 multiple times will return the same Python object. 861 862 An instance of this class compares equal to, and hashes equivalent 863 to, the uid string. When C{str}'d, the result is the uid; when 864 C{repr}'d, the result is a constructor call to 865 C{pyxb.utils.utility.UniqueIdentifier}. 866 """ 867 868 # A map from UID string to the instance that represents it 869 __ExistingUIDs = {} 870

871 - def uid (self):

872 """The string unique identifier""" 873 return self.__uid

874 __uid = None 875 876 # Support pickling, which is done using only the UID.

877 - def __getnewargs__ (self):

878 return (self.__uid,)

879

880 - def __getstate__ (self):

881 return self.__uid

882

883 - def __setstate__ (self, state):

884 assert self.__uid == state

885 886 # Singleton-like

887 - def __new__ (cls, *args):

888 if 0 == len(args): 889 uid = _NewUUIDString() 890 else: 891 uid = args[0] 892 if isinstance(uid, UniqueIdentifier): 893 uid = uid.uid() 894 if not isinstance(uid, basestring): 895 raise TypeError('UniqueIdentifier uid must be a string') 896 rv = cls.__ExistingUIDs.get(uid) 897 if rv is None: 898 rv = super(UniqueIdentifier, cls).__new__(cls) 899 rv.__uid = uid 900 cls.__ExistingUIDs[uid] = rv 901 return rv

902

903 - def associateObject (self, obj):

904 """Associate the given object witth this identifier. 905 906 This is a one-way association: the object is not provided with 907 a return path to this identifier instance.""" 908 self.__associatedObjects.add(obj)

909 - def associatedObjects (self):

910 """The set of objects that have been associated with this 911 identifier instance.""" 912 return self.__associatedObjects

913 __associatedObjects = None 914

915 - def __init__ (self, uid=None):

916 """Create a new UniqueIdentifier instance. 917 918 @param uid: The unique identifier string. If present, it is 919 the callers responsibility to ensure the value is universally 920 unique. If C{None}, one will be provided. 921 @type uid: C{str} or C{unicode} 922 """ 923 assert (uid is None) or (self.uid() == uid), 'UniqueIdentifier: ctor %s, actual %s' % (uid, self.uid()) 924 self.__associatedObjects = set()

925

926 - def __eq__ (self, other):

927 if other is None: 928 return False 929 elif isinstance(other, UniqueIdentifier): 930 other_uid = other.uid() 931 elif isinstance(other, basestring): 932 other_uid = other 933 else: 934 raise TypeError('UniqueIdentifier: Cannot compare with type %s' % (type(other),)) 935 return self.uid() == other_uid

936

937 - def __hash__ (self):

938 return hash(self.uid())

939

940 - def __str__ (self):

941 return self.uid()

942

943 - def __repr__ (self):

944 return 'pyxb.utils.utility.UniqueIdentifier(%s)' % (repr(self.uid()),)

945

946 @BackfillComparisons 947 -class UTCOffsetTimeZone (datetime.tzinfo):

948 """A C{datetime.tzinfo} subclass that helps deal with UTC 949 conversions in an ISO8601 world. 950 951 This class only supports fixed offsets from UTC. 952 """ 953 954 # Regular expression that matches valid ISO8601 time zone suffixes 955 __Lexical_re = re.compile('^([-+])(\d\d):(\d\d)$') 956 957 # The offset in minutes east of UTC. 958 __utcOffset_min = 0 959 960 # Same as __utcOffset_min, but as a datetime.timedelta 961 __utcOffset_td = None 962 963 # A zero-length duration 964 __ZeroDuration = datetime.timedelta(0) 965 966 # Range limits 967 __MaxOffset_td = datetime.timedelta(hours=14) 968

969 - def __init__ (self, spec=None):

970 """Create a time zone instance with a fixed offset from UTC. 971 972 @param spec: Specifies the offset. Can be an integer counting 973 minutes east of UTC, the value C{None} (equal to 0 minutes 974 east), or a string that conform to the ISO8601 time zone 975 sequence (B{Z}, or B{[+-]HH:MM}). 976 """ 977 978 if spec is not None: 979 if isinstance(spec, basestring): 980 if 'Z' == spec: 981 self.__utcOffset_min = 0 982 else: 983 match = self.__Lexical_re.match(spec) 984 if match is None: 985 raise ValueError('Bad time zone: %s' % (spec,)) 986 self.__utcOffset_min = int(match.group(2)) * 60 + int(match.group(3)) 987 if '-' == match.group(1): 988 self.__utcOffset_min = - self.__utcOffset_min 989 elif isinstance(spec, int): 990 self.__utcOffset_min = spec 991 elif isinstance(spec, datetime.timedelta): 992 self.__utcOffset_min = spec.seconds // 60 993 else: 994 raise TypeError('%s: unexpected type %s' % (type(self), type(spec))) 995 self.__utcOffset_td = datetime.timedelta(minutes=self.__utcOffset_min) 996 if self.__utcOffset_td < -self.__MaxOffset_td or self.__utcOffset_td > self.__MaxOffset_td: 997 raise ValueError('XSD timezone offset %s larger than %s' % (self.__utcOffset_td, self.__MaxOffset_td)) 998 if 0 == self.__utcOffset_min: 999 self.__tzName = 'Z' 1000 elif 0 > self.__utcOffset_min: 1001 self.__tzName = '-%02d:%02d' % divmod(-self.__utcOffset_min, 60) 1002 else: 1003 self.__tzName = '+%02d:%02d' % divmod(self.__utcOffset_min, 60)

1004

1005 - def utcoffset (self, dt):

1006 """Returns the constant offset for this zone.""" 1007 return self.__utcOffset_td

1008

1009 - def tzname (self, dt):

1010 """Return the name of the timezone in the format expected by XML Schema.""" 1011 return self.__tzName

1012

1013 - def dst (self, dt):

1014 """Returns a constant zero duration.""" 1015 return self.__ZeroDuration

1016

1017 - def __otherForComparison (self, other):

1018 if isinstance(other, UTCOffsetTimeZone): 1019 return other.__utcOffset_min 1020 return other.utcoffset(datetime.datetime.now())

1021

1022 - def __hash__ (self):

1023 return hash(self.__utcOffset_min)

1024

1025 - def __eq__ (self, other):

1026 return self.__utcOffset_min == self.__otherForComparison(other)

1027

1028 - def __lt__ (self, other):

1029 return self.__utcOffset_min < self.__otherForComparison(other)

1030

1031 -class LocalTimeZone (datetime.tzinfo):

1032 """A C{datetime.tzinfo} subclass for the local time zone. 1033 1034 Mostly pinched from the C{datetime.tzinfo} documentation in Python 2.5.1. 1035 """ 1036 1037 __STDOffset = datetime.timedelta(seconds=-time.timezone) 1038 __DSTOffset = __STDOffset 1039 if time.daylight: 1040 __DSTOffset = datetime.timedelta(seconds=-time.altzone) 1041 __ZeroDelta = datetime.timedelta(0) 1042 __DSTDelta = __DSTOffset - __STDOffset 1043

1044 - def utcoffset (self, dt):

1045 if self.__isDST(dt): 1046 return self.__DSTOffset 1047 return self.__STDOffset

1048

1049 - def dst (self, dt):

1050 if self.__isDST(dt): 1051 return self.__DSTDelta 1052 return self.__ZeroDelta

1053

1054 - def tzname (self, dt):

1055 return time.tzname[self.__isDST(dt)]

1056

1057 - def __isDST (self, dt):

1058 tt = (dt.year, dt.month, dt.day, 1059 dt.hour, dt.minute, dt.second, 1060 0, 0, -1) 1061 tt = time.localtime(time.mktime(tt)) 1062 return tt.tm_isdst > 0

1063

1064 -class PrivateTransient_mixin (pyxb.cscRoot):

1065 """Emulate the B{transient} keyword from Java for private member 1066 variables. 1067 1068 This class defines a C{__getstate__} method which returns a copy 1069 of C{self.__dict__} with certain members removed. Specifically, 1070 if a string "s" appears in a class member variable named 1071 C{__PrivateTransient} defined in the "Class" class, then the 1072 corresponding private variable "_Class__s" will be removed from 1073 the state dictionary. This is used to eliminate unnecessary 1074 fields from instances placed in L{namespace 1075 archives<pyxb.namespace.archive.NamespaceArchive>} without having 1076 to implement a C{__getstate__} method in every class in the 1077 instance hierarchy. 1078 1079 For an example, see 1080 L{pyxb.xmlschema.structures._SchemaComponent_mixin} 1081 1082 If you use this, it is your responsibility to define the 1083 C{__PrivateTransient} class variable and add to it the required 1084 variable names. 1085 1086 Classes that inherit from this are free to define their own 1087 C{__getstate__} method, which may or may not invoke the superclass 1088 one. If you do this, be sure that the class defining 1089 C{__getstate__} lists L{PrivateTransient_mixin} as one of its 1090 direct superclasses, lest the latter end up earlier in the mro and 1091 consequently bypass the local override. 1092 """ 1093 1094 # Suffix used when creating the class member variable in which the 1095 # transient members are cached. 1096 __Attribute = '__PrivateTransient' 1097

1098 - def __getstate__ (self):

1099 state = self.__dict__.copy() 1100 # Note that the aggregate set is stored in a class variable 1101 # with a slightly different name than the class-level set. 1102 attr = '_%s%s_' % (self.__class__.__name__, self.__Attribute) 1103 skipped = getattr(self.__class__, attr, None) 1104 if skipped is None: 1105 skipped = set() 1106 for cl in self.__class__.mro(): 1107 for (k, v) in cl.__dict__.iteritems(): 1108 if k.endswith(self.__Attribute): 1109 cl2 = k[:-len(self.__Attribute)] 1110 skipped.update([ '%s__%s' % (cl2, _n) for _n in v ]) 1111 setattr(self.__class__, attr, skipped) 1112 for k in skipped: 1113 if state.get(k) is not None: 1114 del state[k] 1115 # Uncomment the following to test whether undesirable types 1116 # are being pickled, generally by accidently leaving a 1117 # reference to one in an instance private member. 1118 #for (k, v) in state.iteritems(): 1119 # import pyxb.namespace 1120 # import xml.dom 1121 # import pyxb.xmlschema.structures 1122 # if isinstance(v, (pyxb.namespace.resolution.NamespaceContext, xml.dom.Node, pyxb.xmlschema.structures.Schema)): 1123 # raise pyxb.LogicError('Unexpected instance of %s key %s in %s' % (type(v), k, self)) 1124 1125 return state

1126

1127 -def GetMatchingFiles (path, pattern=None, default_path_wildcard=None, default_path=None, prefix_pattern=None, prefix_substituend=None):

1128 """Provide a list of absolute paths to files present in any of a 1129 set of directories and meeting certain criteria. 1130 1131 This is used, for example, to locate namespace archive files 1132 within the archive path specified by the user. One could use:: 1133 1134 files = GetMatchingFiles('&bundles//:+', 1135 pattern=re.compile('.*\.wxs$'), 1136 default_path_wildcard='+', 1137 default_path='/usr/local/pyxb/nsarchives', 1138 prefix_pattern='&', 1139 prefix_substituend='/opt/pyxb') 1140 1141 to obtain all files that can be recursively found within 1142 C{/opt/pyxb/bundles}, or non-recursively within 1143 C{/usr/local/pyxb/nsarchives}. 1144 1145 @param path: A list of directories in which the search should be 1146 performed. The entries are separated by os.pathsep, which is a 1147 colon on POSIX platforms and a semi-colon on Windows. If a path 1148 entry ends with C{//} regardless of platform, the suffix C{//} is 1149 stripped and any directory beneath the path is scanned as well, 1150 recursively. 1151 1152 @keyword pattern: Optional regular expression object used to 1153 determine whether a given directory entry should be returned. If 1154 left as C{None}, all directory entries will be returned. 1155 1156 @keyword default_path_wildcard: An optional string which, if 1157 present as a single directory in the path, is replaced by the 1158 value of C{default-path}. 1159 1160 @keyword default_path: A system-defined directory which can be 1161 restored to the path by placing the C{default_path_wildcard} in 1162 the C{path}. 1163 1164 @keyword prefix_pattern: An optional string which, if present at 1165 the start of a path element, is replaced by the value of 1166 C{prefix_substituend}. 1167 1168 @keyword prefix_substituend: A system-defined string (path prefix) 1169 which can be combined with the user-provided path information to 1170 identify a file or subdirectory within an installation-specific 1171 area. 1172 """ 1173 matching_files = [] 1174 path_set = path.split(os.pathsep) 1175 while 0 < len(path_set): 1176 path = path_set.pop(0) 1177 if default_path_wildcard == path: 1178 if default_path is not None: 1179 path_set[0:0] = default_path.split(os.pathsep) 1180 default_path = None 1181 continue 1182 recursive = False 1183 if (prefix_pattern is not None) and path.startswith(prefix_pattern): 1184 path = os.path.join(prefix_substituend, path[len(prefix_pattern):]) 1185 if path.endswith('//'): 1186 recursive = True 1187 path = path[:-2] 1188 if os.path.isfile(path): 1189 if (pattern is None) or (pattern.search(path) is not None): 1190 matching_files.append(path) 1191 else: 1192 for (root, dirs, files) in os.walk(path): 1193 for f in files: 1194 if (pattern is None) or (pattern.search(f) is not None): 1195 matching_files.append(os.path.join(root, f)) 1196 if not recursive: 1197 break 1198 return matching_files

1199

1200 @BackfillComparisons 1201 -class Location (object):

1202 __locationBase = None 1203 __lineNumber = None 1204 __columnNumber = None 1205

1206 - def __init__ (self, location_base=None, line_number=None, column_number=None):

1207 if isinstance(location_base, str): 1208 location_base = intern(location_base) 1209 self.__locationBase = location_base 1210 self.__lineNumber = line_number 1211 self.__columnNumber = column_number

1212

1213 - def newLocation (self, locator=None, line_number=None, column_number=None):

1214 if locator is not None: 1215 try: 1216 line_number = locator.getLineNumber() 1217 column_number = locator.getColumnNumber() 1218 except: 1219 pass 1220 return Location(self.__locationBase, line_number, column_number)

1221 1222 locationBase = property(lambda _s: _s.__locationBase) 1223 lineNumber = property(lambda _s: _s.__lineNumber) 1224 columnNumber = property(lambda _s: _s.__columnNumber) 1225

1226 - def __cmpSingleUnlessNone (self, v1, v2):

1227 if v1 is None: 1228 if v2 is None: 1229 return None 1230 return 1 1231 if v2 is None: 1232 return -1 1233 if v1 < v2: 1234 return -1 1235 if v1 == v2: 1236 return 0 1237 return 1

1238

1239 - def __cmpTupleUnlessNone (self, v1, v2):

1240 rv = self.__cmpSingleUnlessNone(v1.__locationBase, v2.__locationBase) 1241 if rv is None: 1242 rv = self.__cmpSingleUnlessNone(v1.__lineNumber, v2.__lineNumber) 1243 if rv is None: 1244 rv = self.__cmpSingleUnlessNone(v1.__columnNumber, v2.__columnNumber) 1245 return rv

1246

1247 - def __hash__ (self):

1248 return hash((self.__locationBase, self.__lineNumber, self.__columnNumber))

1249

1250 - def __eq__ (self, other):

1251 """Comparison by locationBase, then lineNumber, then columnNumber.""" 1252 if other is None: 1253 return False 1254 rv = self.__cmpTupleUnlessNone(self, other) 1255 if rv is None: 1256 return True 1257 return 0 == rv

1258

1259 - def __lt__ (self, other):

1260 if other is None: 1261 return False 1262 rv = self.__cmpTupleUnlessNone(self, other) 1263 if rv is None: 1264 return False 1265 return -1 == rv

1266

1267 - def __str__ (self):

1268 if self.locationBase is None: 1269 lb = '<unknown>' 1270 else: 1271 # No, this should not be os.sep. The location is 1272 # expected to be a URI. 1273 lb = self.locationBase.rsplit('/', 1)[-1] 1274 return '%s[%s:%s]' % (lb, self.lineNumber, self.columnNumber)

1275

1276 - def __repr__ (self):

1277 t = type(self) 1278 ctor = '%s.%s' % (t.__module__, t.__name__) 1279 return '%s(%r, %r, %r)' % (ctor, self.__locationBase, self.__lineNumber, self.__columnNumber)

1280

1281 -class Locatable_mixin (pyxb.cscRoot):

1282 __location = None 1283

1284 - def __init__ (self, *args, **kw):

1285 self.__location = kw.pop('location', None) 1286 super(Locatable_mixin, self).__init__(*args, **kw)

1287

1288 - def _setLocation (self, location):

1289 self.__location = location

1290

1291 - def _location (self):

1292 return self.__location

1293

Source Code for Module pyxb.utils.utility