pyxb.utils.utility

1 # -*- coding: utf-8 -*- 2 # Copyright 2009-2013, Peter A. Bigot 3 # 4 # Licensed under the Apache License, Version 2.0 (the "License"); you may 5 # not use this file except in compliance with the License. You may obtain a 6 # copy of the License at: 7 # 8 # http://www.apache.org/licenses/LICENSE-2.0 9 # 10 # Unless required by applicable law or agreed to in writing, software 11 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 # License for the specific language governing permissions and limitations 14 # under the License. 15 16 """Utility functions and classes.""" 17 18 import re 19 import os 20 import errno 21 import pyxb 22 from pyxb.utils.six.moves.urllib import parse as urlparse 23 import time 24 import datetime 25 import logging 26 from pyxb.utils import six 27 28 _log = logging.getLogger(__name__)

29 30 -class Object (object):

31 """A dummy class used to hold arbitrary attributes. 32 33 Essentially this gives us a map without having to worry about 34 converting names to text to use as keys. 35 """ 36 pass

37

38 -def BackfillComparisons (cls):

39 """Class decorator that fills in missing ordering methods. 40 41 Concept derived from Python 2.7.5 functools.total_ordering, 42 but this version requires that __eq__ and __lt__ be provided, 43 and unconditionally overrides __ne__, __gt__, __le__, and __ge__ 44 with the derived versions. 45 46 This is still necessary in Python 3 because in Python 3 the 47 comparison x >= y is done by the __ge__ inherited from object, 48 which does not handle the case where x and y are not the same type 49 even if the underlying y < x would convert x to be compatible. """ 50 51 def applyconvert (cls, derived): 52 for (opn, opx) in derived: 53 opx.__name__ = opn 54 opx.__doc__ = getattr(int, opn).__doc__ 55 setattr(cls, opn, opx)

56 57 applyconvert(cls, ( 58 ('__gt__', lambda self, other: not (self.__lt__(other) or self.__eq__(other))), 59 ('__le__', lambda self, other: self.__lt__(other) or self.__eq__(other)), 60 ('__ge__', lambda self, other: not self.__lt__(other)) 61 )) 62 applyconvert(cls, ( 63 ('__ne__', lambda self, other: not self.__eq__(other)), 64 )) 65 return cls 66

67 -def IteratedCompareMixed (lhs, rhs):

68 """Tuple comparison that permits C{None} as lower than any value, 69 and defines other cross-type comparison. 70 71 @return: -1 if lhs < rhs, 0 if lhs == rhs, 1 if lhs > rhs.""" 72 li = iter(lhs) 73 ri = iter(rhs) 74 while True: 75 try: 76 (lv, rv) = (next(li), next(ri)) 77 if lv is None: 78 if rv is None: 79 continue 80 return -1 81 if rv is None: 82 return 1 83 if lv == rv: 84 continue 85 if lv < rv: 86 return -1 87 return 1 88 except StopIteration: 89 nl = len(lhs) 90 nr = len(rhs) 91 if nl < nr: 92 return -1 93 if nl == nr: 94 return 0 95 return 1

96

97 -def QuotedEscaped (s):

98 """Convert a string into a literal value that can be used in Python source. 99 100 This just calls C{repr}. No point in getting all complex when the language 101 already gives us what we need. 102 103 @rtype: C{str} 104 """ 105 return repr(s)

106

107 -def _DefaultXMLIdentifierToPython (identifier):

108 """Default implementation for _XMLIdentifierToPython 109 110 For historical reasons, this converts the identifier from a str to 111 unicode in the system default encoding. This should have no 112 practical effect. 113 114 @param identifier : some XML identifier 115 116 @return: C{unicode(identifier)} 117 """ 118 119 return six.text_type(identifier)

120

121 -def _SetXMLIdentifierToPython (xml_identifier_to_python):

122 """Configure a callable L{MakeIdentifier} uses to pre-process an XM Lidentifier. 123 124 In Python3, identifiers can be full Unicode tokens, but in Python2, 125 all identifiers must be ASCII characters. L{MakeIdentifier} enforces 126 this by removing all characters that are not valid within an 127 identifier. 128 129 In some cases, an application generating bindings may be able to 130 transliterate Unicode code points that are not valid Python identifier 131 characters into something else. This callable can be assigned to 132 perform that translation before the invalid characters are 133 stripped. 134 135 It is not the responsibility of this callable to do anything other 136 than replace whatever characters it wishes to. All 137 transformations performed by L{MakeIdentifier} will still be 138 applied, to ensure the output is in fact a legal identifier. 139 140 @param xml_identifier_to_python : A callable that takes a string 141 and returns a Unicode, possibly with non-identifier characters 142 replaced by other characters. Pass C{None} to reset to the 143 default implementation, which is L{_DefaultXMLIdentifierToPython}. 144 145 @rtype: C{unicode} 146 """ 147 global _XMLIdentifierToPython 148 if xml_identifier_to_python is None: 149 xml_identifier_to_python = _DefaultXMLIdentifierToPython 150 _XMLIdentifierToPython = xml_identifier_to_python

151 152 _XMLIdentifierToPython = _DefaultXMLIdentifierToPython 153 154 _UnderscoreSubstitute_re = re.compile(r'[- .]') 155 _NonIdentifier_re = re.compile(r'[^a-zA-Z0-9_]') 156 _PrefixUnderscore_re = re.compile(r'^_+') 157 _PrefixDigit_re = re.compile(r'^\d+') 158 _CamelCase_re = re.compile(r'_\w')

159 160 -def MakeIdentifier (s, camel_case=False):

161 """Convert a string into something suitable to be a Python identifier. 162 163 The string is processed by L{_XMLIdentifierToPython}. Following 164 this, dashes, spaces, and periods are replaced by underscores, and 165 characters not permitted in Python identifiers are stripped. 166 Furthermore, any leading underscores are removed. If the result 167 begins with a digit, the character 'n' is prepended. If the 168 result is the empty string, the string 'emptyString' is 169 substituted. 170 171 No check is made for L{conflicts with keywords <DeconflictKeyword>}. 172 173 @keyword camel_case : If C{True}, any underscore in the result 174 string that is immediately followed by an alphanumeric is replaced 175 by the capitalized version of that alphanumeric. Thus, 176 'one_or_two' becomes 'oneOrTwo'. If C{False} (default), has no 177 effect. 178 179 @rtype: C{str} 180 """ 181 s = _XMLIdentifierToPython(s) 182 s = _PrefixUnderscore_re.sub('', _NonIdentifier_re.sub('', _UnderscoreSubstitute_re.sub('_', s))) 183 if camel_case: 184 s = _CamelCase_re.sub(lambda _m: _m.group(0)[1].upper(), s) 185 if _PrefixDigit_re.match(s): 186 s = 'n' + s 187 if 0 == len(s): 188 s = 'emptyString' 189 return s

190

191 -def MakeModuleElement (s):

192 """Convert a string into something that can be a valid element in a 193 Python module path. 194 195 Module path elements are similar to identifiers, but may begin 196 with numbers and should not have leading underscores removed. 197 """ 198 return _UnderscoreSubstitute_re.sub('_', _XMLIdentifierToPython(s))

199 200 _PythonKeywords = frozenset( ( 201 "and", "as", "assert", "break", "class", "continue", "def", "del", 202 "elif", "else", "except", "exec", "finally", "for", "from", "global", 203 "if", "import", "in", "is", "lambda", "not", "or", "pass", "print", 204 "raise", "return", "try", "while", "with", "yield" 205 ) ) 206 """Python keywords. Note that types like int and float are not 207 keywords. 208 209 @see: U{http://docs.python.org/reference/lexical_analysis.html#keywords}.""" 210 211 _PythonBuiltInConstants = frozenset( ( 212 "False", "True", "None", "NotImplemented", "Ellipsis", "__debug__", 213 # "set" is neither a keyword nor a constant, but if some fool 214 # like {http://www.w3.org/2001/SMIL20/}set gets defined there's 215 # no way to access the builtin constructor. 216 "set" 217 ) ) 218 """Other symbols that aren't keywords but that can't be used. 219 220 @see: U{http://docs.python.org/library/constants.html}.""" 221 222 _Keywords = frozenset(_PythonKeywords.union(_PythonBuiltInConstants)) 223 """The keywords reserved for Python, derived from L{_PythonKeywords} 224 and L{_PythonBuiltInConstants}."""

225 226 -def DeconflictKeyword (s, aux_keywords=frozenset()):

227 """If the provided string C{s} matches a Python language keyword, 228 append an underscore to distinguish them. 229 230 See also L{MakeUnique}. 231 232 @param s: string to be deconflicted 233 234 @keyword aux_keywords: optional iterable of additional strings 235 that should be treated as keywords. 236 237 @rtype: C{str} 238 239 """ 240 if (s in _Keywords) or (s in aux_keywords): 241 return '%s_' % (s,) 242 return s

243

244 -def MakeUnique (s, in_use):

245 """Return an identifier based on C{s} that is not in the given set. 246 247 The returned identifier is made unique by appending an underscore 248 and, if necessary, a serial number. 249 250 The order is : C{x}, C{x_}, C{x_2}, C{x_3}, ... 251 252 @param in_use: The set of identifiers already in use in the 253 relevant scope. C{in_use} is updated to contain the returned 254 identifier. 255 256 @rtype: C{str} 257 """ 258 if s in in_use: 259 ctr = 2 260 s = s.rstrip('_') 261 candidate = '%s_' % (s,) 262 while candidate in in_use: 263 candidate = '%s_%d' % (s, ctr) 264 ctr += 1 265 s = candidate 266 in_use.add(s) 267 return s

268

269 -def PrepareIdentifier (s, in_use, aux_keywords=frozenset(), private=False, protected=False):

270 """Combine everything required to create a unique identifier. 271 272 Leading and trailing underscores are stripped from all 273 identifiers. 274 275 @param in_use: the set of already used identifiers. Upon return 276 from this function, it is updated to include the returned 277 identifier. 278 279 @keyword aux_keywords: an optional set of additional symbols that 280 are illegal in the given context; use this to prevent conflicts 281 with known method names. 282 283 @keyword private: if C{False} (default), all leading underscores 284 are stripped, guaranteeing the identifier will not be private. If 285 C{True}, the returned identifier has two leading underscores, 286 making it a private variable within a Python class. 287 288 @keyword protected: as for C{private}, but uses only one 289 underscore. 290 291 @rtype: C{str} 292 293 @note: Only module-level identifiers should be treated as 294 protected. The class-level L{_DeconflictSymbols_mixin} 295 infrastructure does not include protected symbols. All class and 296 instance members beginning with a single underscore are reserved 297 for the PyXB infrastructure.""" 298 s = DeconflictKeyword(MakeIdentifier(s).strip('_'), aux_keywords) 299 if private: 300 s = '__' + s 301 elif protected: 302 s = '_' + s 303 return MakeUnique(s, in_use)

304

305 # @todo: descend from pyxb.cscRoot, if we import pyxb 306 -class _DeconflictSymbols_mixin (object):

307 """Mix-in used to deconflict public symbols in classes that may be 308 inherited by generated binding classes. 309 310 Some classes, like the L{pyxb.binding.basis.element} or 311 L{pyxb.binding.basis.simpleTypeDefinition} classes in 312 L{pyxb.binding.basis}, have public symbols associated with 313 functions and variables. It is possible that an XML schema might 314 include tags and attribute names that match these symbols. To 315 avoid conflict, the reserved symbols marked in this class are 316 added to the pre-defined identifier set. 317 318 Subclasses should create a class-level variable 319 C{_ReservedSymbols} that contains a set of strings denoting the 320 symbols reserved in this class, combined with those from any 321 superclasses that also have reserved symbols. Code like the 322 following is suggested:: 323 324 # For base classes (direct mix-in): 325 _ReservedSymbols = set([ 'one', 'two' ]) 326 # For subclasses: 327 _ReservedSymbols = SuperClass._ReservedSymbols.union(set([ 'three' ])) 328 329 Only public symbols (those with no underscores) are currently 330 supported. (Private symbols can't be deconflicted that easily, 331 and no protected symbols that derive from the XML are created by 332 the binding generator.) 333 """ 334 335 _ReservedSymbols = set() 336 """There are no reserved symbols in the base class."""

337 338 # Regular expression detecting tabs, carriage returns, and line feeds 339 __TabCRLF_re = re.compile("[\t\n\r]") 340 # Regular expressoin detecting sequences of two or more spaces 341 __MultiSpace_re = re.compile(" +")

342 343 -def NormalizeWhitespace (text, preserve=False, replace=False, collapse=False):

344 """Normalize the given string. 345 346 Exactly one of the C{preserve}, C{replace}, and C{collapse} keyword 347 parameters must be assigned the value C{True} by the caller. 348 349 - C{preserve}: the text is returned unchanged. 350 351 - C{replace}: all tabs, newlines, and carriage returns are 352 replaced with ASCII spaces. 353 354 - C{collapse}: the C{replace} normalization is done, then 355 sequences of two or more spaces are replaced by a single space. 356 357 See the U{whiteSpace facet<http://www.w3.org/TR/xmlschema-2/#rf-whiteSpace>}. 358 359 @rtype: C{str} 360 """ 361 if preserve: 362 return text 363 text = __TabCRLF_re.sub(' ', text) 364 if replace: 365 return text 366 if collapse: 367 return __MultiSpace_re.sub(' ', text).strip() 368 # pyxb not imported here; could be. 369 raise Exception('NormalizeWhitespace: No normalization specified')

370

371 -class Graph:

372 """Represent a directed graph with arbitrary objects as nodes. 373 374 This is used in the L{code 375 generator<pyxb.binding.generate.Generator>} to determine order 376 dependencies among components within a namespace, and schema that 377 comprise various namespaces. An edge from C{source} to C{target} 378 indicates that some aspect of C{source} requires that some aspect 379 of C{target} already be available. 380 """ 381

382 - def __init__ (self, root=None):

383 self.__roots = None 384 if root is not None: 385 self.__roots = set([root]) 386 self.__edges = set() 387 self.__edgeMap = { } 388 self.__reverseMap = { } 389 self.__nodes = set()

390 391 __scc = None 392 __sccMap = None 393 __dfsOrder = None 394

395 - def addEdge (self, source, target):

396 """Add a directed edge from the C{source} to the C{target}. 397 398 The nodes are added to the graph if necessary. 399 """ 400 self.__edges.add( (source, target) ) 401 self.__edgeMap.setdefault(source, set()).add(target) 402 if source != target: 403 self.__reverseMap.setdefault(target, set()).add(source) 404 self.__nodes.add(source) 405 self.__nodes.add(target)

406

407 - def addNode (self, node):

408 """Add the given node to the graph.""" 409 self.__nodes.add(node)

410 411 __roots = None

412 - def roots (self, reset=False):

413 """Return the set of nodes calculated to be roots (i.e., those that have no incoming edges). 414 415 This caches the roots calculated in a previous invocation 416 unless the C{reset} keyword is given the value C{True}. 417 418 @note: Upon reset, any notes that had been manually added 419 using L{addNode} will no longer be in the set. 420 421 @keyword reset: If C{True}, any cached value is discarded and 422 recomputed. No effect if C{False} (defalut). 423 424 @rtype: C{set} 425 """ 426 if reset or (self.__roots is None): 427 self.__roots = set() 428 for n in self.__nodes: 429 if not (n in self.__reverseMap): 430 self.__roots.add(n) 431 return self.__roots

432 - def addRoot (self, root):

433 """Add the provided node as a root node, even if it has incoming edges. 434 435 The node need not be present in the graph (if necessary, it is added). 436 437 Note that roots added in this way do not survive a reset using 438 L{roots}. 439 440 @return: C{self} 441 """ 442 if self.__roots is None: 443 self.__roots = set() 444 self.__nodes.add(root) 445 self.__roots.add(root) 446 return self

447

448 - def edgeMap (self):

449 """Return the edges in the graph. 450 451 The edge data structure is a map from the source node to the 452 set of nodes that can be reached in a single step from the 453 source. 454 """ 455 return self.__edgeMap

456 __edgeMap = None 457

458 - def edges (self):

459 """Return the edges in the graph. 460 461 The edge data structure is a set of node pairs represented as C{( source, target )}. 462 """ 463 return self.__edges

464

465 - def nodes (self):

466 """Return the set of nodes in the graph. 467 468 The node collection data structure is a set containing node 469 objects, whatever they may be.""" 470 return self.__nodes

471

472 - def tarjan (self, reset=False):

473 """Execute Tarjan's algorithm on the graph. 474 475 U{Tarjan's 476 algorithm<http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm>} 477 computes the U{strongly-connected 478 components<http://en.wikipedia.org/wiki/Strongly_connected_component>} 479 of the graph: i.e., the sets of nodes that form a minimal 480 closed set under edge transition. In essence, the loops. We 481 use this to detect groups of components that have a dependency 482 cycle. 483 484 @keyword reset: If C{True}, any cached component set is erased 485 and recomputed. If C{True}, an existing previous result is 486 left unchanged.""" 487 488 if (self.__scc is not None) and (not reset): 489 return 490 self.__sccMap = { } 491 self.__stack = [] 492 self.__sccOrder = [] 493 self.__scc = [] 494 self.__index = 0 495 self.__tarjanIndex = { } 496 self.__tarjanLowLink = { } 497 for v in self.__nodes: 498 self.__tarjanIndex[v] = None 499 roots = self.roots() 500 if (0 == len(roots)) and (0 < len(self.__nodes)): 501 raise Exception('TARJAN: No roots found in graph with %d nodes' % (len(self.__nodes),)) 502 for r in roots: 503 self._tarjan(r) 504 self.__didTarjan = True

505

506 - def _tarjan (self, v):

507 """Do the work of Tarjan's algorithm for a given root node.""" 508 if self.__tarjanIndex.get(v) is not None: 509 # "Root" was already reached. 510 return 511 self.__tarjanIndex[v] = self.__tarjanLowLink[v] = self.__index 512 self.__index += 1 513 self.__stack.append(v) 514 source = v 515 for target in self.__edgeMap.get(source, []): 516 if self.__tarjanIndex[target] is None: 517 self._tarjan(target) 518 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target]) 519 elif target in self.__stack: 520 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target]) 521 else: 522 pass 523 524 if self.__tarjanLowLink[v] == self.__tarjanIndex[v]: 525 scc = [] 526 while True: 527 scc.append(self.__stack.pop()) 528 if v == scc[-1]: 529 break 530 self.__sccOrder.append(scc) 531 if 1 < len(scc): 532 self.__scc.append(scc) 533 [ self.__sccMap.setdefault(_v, scc) for _v in scc ]

534

535 - def scc (self, reset=False):

536 """Return the strongly-connected components of the graph. 537 538 The data structure is a set, each element of which is itself a 539 set containing one or more nodes from the graph. 540 541 @see: L{tarjan}. 542 """ 543 if reset or (self.__scc is None): 544 self.tarjan(reset) 545 return self.__scc

546 __scc = None 547

548 - def sccMap (self, reset=False):

549 """Return a map from nodes to the strongly-connected component 550 to which the node belongs. 551 552 @keyword reset: If C{True}, the L{tarjan} method will be 553 re-invoked, propagating the C{reset} value. If C{False} 554 (default), a cached value will be returned if available. 555 556 @see: L{tarjan}. 557 """ 558 if reset or (self.__sccMap is None): 559 self.tarjan(reset) 560 return self.__sccMap

561 __sccMap = None 562

563 - def sccOrder (self, reset=False):

564 """Return the strongly-connected components in order. 565 566 The data structure is a list, in dependency order, of strongly 567 connected components (which can be single nodes). Appearance 568 of a node in a set earlier in the list indicates that it has 569 no dependencies on any node that appears in a subsequent set. 570 This order is preferred over L{dfsOrder} for code generation, 571 since it detects loops. 572 573 @see: L{tarjan}. 574 """ 575 if reset or (self.__sccOrder is None): 576 self.tarjan(reset) 577 return self.__sccOrder

578 __sccOrder = None 579

580 - def sccForNode (self, node, **kw):

581 """Return the strongly-connected component to which the given 582 node belongs. 583 584 Any keywords suppliend when invoking this method are passed to 585 the L{sccMap} method. 586 587 @return: The SCC set, or C{None} if the node is not present in 588 the results of Tarjan's algorithm.""" 589 590 return self.sccMap(**kw).get(node)

591

592 - def cyclomaticComplexity (self):

593 """Return the cyclomatic complexity of the graph.""" 594 self.tarjan() 595 return len(self.__edges) - len(self.__nodes) + 2 * len(self.__scc)

596

597 - def __dfsWalk (self, source):

598 assert not (source in self.__dfsWalked) 599 self.__dfsWalked.add(source) 600 for target in self.__edgeMap.get(source, []): 601 if not (target in self.__dfsWalked): 602 self.__dfsWalk(target) 603 self.__dfsOrder.append(source)

604

605 - def _generateDOT (self, title='UNKNOWN', labeller=None):

606 node_map = { } 607 idx = 1 608 for n in self.__nodes: 609 node_map[n] = idx 610 idx += 1 611 text = [] 612 text.append('digraph "%s" {' % (title,)) 613 for n in self.__nodes: 614 if labeller is not None: 615 nn = labeller(n) 616 else: 617 nn = str(n) 618 text.append('%s [shape=box,label="%s"];' % (node_map[n], nn)) 619 for s in self.__nodes: 620 for d in self.__edgeMap.get(s, []): 621 if s != d: 622 text.append('%s -> %s;' % (node_map[s], node_map[d])) 623 text.append("};") 624 return "\n".join(text)

625

626 - def dfsOrder (self, reset=False):

627 """Return the nodes of the graph in U{depth-first-search 628 order<http://en.wikipedia.org/wiki/Depth-first_search>}. 629 630 The data structure is a list. Calculated lists are retained 631 and returned on future invocations, subject to the C{reset} 632 keyword. 633 634 @keyword reset: If C{True}, discard cached results and recompute the order.""" 635 if reset or (self.__dfsOrder is None): 636 self.__dfsWalked = set() 637 self.__dfsOrder = [] 638 for root in self.roots(reset=reset): 639 self.__dfsWalk(root) 640 self.__dfsWalked = None 641 if len(self.__dfsOrder) != len(self.__nodes): 642 raise Exception('DFS walk did not cover all nodes (walk %d versus nodes %d)' % (len(self.__dfsOrder), len(self.__nodes))) 643 return self.__dfsOrder

644

645 - def rootSetOrder (self):

646 """Return the nodes of the graph as a sequence of root sets. 647 648 The first root set is the set of nodes that are roots: i.e., 649 have no incoming edges. The second root set is the set of 650 nodes that have incoming nodes in the first root set. This 651 continues until all nodes have been reached. The sets impose 652 a partial order on the nodes, without being as constraining as 653 L{sccOrder}. 654 655 @return: a list of the root sets.""" 656 order = [] 657 nodes = set(self.__nodes) 658 edge_map = {} 659 for (d, srcs) in six.iteritems(self.__edgeMap): 660 edge_map[d] = srcs.copy() 661 while nodes: 662 freeset = set() 663 for n in nodes: 664 if not (n in edge_map): 665 freeset.add(n) 666 if 0 == len(freeset): 667 _log.error('dependency cycle in named components') 668 return None 669 order.append(freeset) 670 nodes.difference_update(freeset) 671 new_edge_map = {} 672 for (d, srcs) in six.iteritems(edge_map): 673 srcs.difference_update(freeset) 674 if 0 != len(srcs): 675 new_edge_map[d] = srcs 676 edge_map = new_edge_map 677 return order

678 679 LocationPrefixRewriteMap_ = { }

680 681 -def SetLocationPrefixRewriteMap (prefix_map):

682 """Set the map that is used to by L{NormalizeLocation} to rewrite URI prefixes.""" 683 684 LocationPrefixRewriteMap_.clear() 685 LocationPrefixRewriteMap_.update(prefix_map)

686

687 -def NormalizeLocation (uri, parent_uri=None, prefix_map=None):

688 """Normalize a URI against an optional parent_uri in the way that is 689 done for C{schemaLocation} attribute values. 690 691 If no URI schema is present, this will normalize a file system 692 path. 693 694 Optionally, the resulting absolute URI can subsequently be 695 rewritten to replace specified prefix strings with alternative 696 strings, e.g. to convert a remote URI to a local repository. This 697 rewriting is done after the conversion to an absolute URI, but 698 before normalizing file system URIs. 699 700 @param uri : The URI to normalize. If C{None}, function returns 701 C{None} 702 703 @param parent_uri : The base URI against which normalization is 704 done, if C{uri} is a relative URI. 705 706 @param prefix_map : A map used to rewrite URI prefixes. If 707 C{None}, the value defaults to that stored by 708 L{SetLocationPrefixRewriteMap}. 709 710 """ 711 if uri is None: 712 return uri 713 if parent_uri is None: 714 abs_uri = uri 715 else: 716 abs_uri = urlparse.urljoin(parent_uri, uri) 717 if prefix_map is None: 718 prefix_map = LocationPrefixRewriteMap_ 719 for (pfx, sub) in six.iteritems(prefix_map): 720 if abs_uri.startswith(pfx): 721 abs_uri = sub + abs_uri[len(pfx):] 722 if 0 > abs_uri.find(':'): 723 abs_uri = os.path.realpath(abs_uri) 724 return abs_uri

725

726 727 -def DataFromURI (uri, archive_directory=None):

728 """Retrieve the contents of the uri as raw data. 729 730 If the uri does not include a scheme (e.g., C{http:}), it is 731 assumed to be a file path on the local system.""" 732 733 from pyxb.utils.six.moves.urllib.request import urlopen 734 stream = None 735 exc = None 736 # Only something that has a colon is a non-file URI. Some things 737 # that have a colon are a file URI (sans schema). Prefer urllib2, 738 # but allow urllib (which apparently works better on Windows). 739 if 0 <= uri.find(':'): 740 try: 741 stream = urlopen(uri) 742 except Exception as e: 743 exc = e 744 if (stream is None) and six.PY2: 745 import urllib 746 try: 747 stream = urllib.urlopen(uri) 748 exc = None 749 except: 750 # Prefer urllib exception 751 pass 752 if stream is None: 753 # No go as URI; give file a chance 754 try: 755 stream = open(uri, 'rb') 756 exc = None 757 except Exception as e: 758 if exc is None: 759 exc = e 760 if exc is not None: 761 _log.error('open %s', uri, exc_info=exc) 762 raise exc 763 try: 764 # Protect this in case whatever stream is doesn't have an fp 765 # attribute. 766 if isinstance(stream, six.moves.file) or isinstance(stream.fp, six.moves.file): 767 archive_directory = None 768 except: 769 pass 770 xmld = stream.read() 771 if archive_directory: 772 base_name = os.path.basename(os.path.normpath(urlparse.urlparse(uri)[2])) 773 counter = 1 774 dest_file = os.path.join(archive_directory, base_name) 775 while os.path.isfile(dest_file): 776 dest_file = os.path.join(archive_directory, '%s.%d' % (base_name, counter)) 777 counter += 1 778 try: 779 OpenOrCreate(dest_file).write(xmld) 780 except OSError as e: 781 _log.warning('Unable to save %s in %s: %s', uri, dest_file, e) 782 return xmld

783

784 -def OpenOrCreate (file_name, tag=None, preserve_contents=False):

785 """Return a file object used to write binary data into the given file. 786 787 Use the C{tag} keyword to preserve the contents of existing files 788 that are not supposed to be overwritten. 789 790 To get a writable file but leaving any existing contents in place, 791 set the C{preserve_contents} keyword to C{True}. Normally, existing file 792 contents are erased. 793 794 The returned file pointer is positioned at the end of the file. 795 796 @keyword tag: If not C{None} and the file already exists, absence 797 of the given value in the first 4096 bytes of the file (decoded as 798 UTF-8) causes an C{IOError} to be raised with C{errno} set to 799 C{EEXIST}. I.e., only files with this value in the first 4KB will 800 be returned for writing. 801 802 @keyword preserve_contents: This value controls whether existing 803 contents of the file will be erased (C{False}, default) or left in 804 place (C{True}). 805 """ 806 (path, leaf) = os.path.split(file_name) 807 if path: 808 try: 809 os.makedirs(path) 810 except Exception as e: 811 if not (isinstance(e, (OSError, IOError)) and (errno.EEXIST == e.errno)): 812 raise 813 fp = open(file_name, 'ab+') 814 if (tag is not None) and (0 < os.fstat(fp.fileno()).st_size): 815 fp.seek(0) # os.SEEK_SET 816 blockd = fp.read(4096) 817 blockt = blockd.decode('utf-8') 818 if 0 > blockt.find(tag): 819 raise OSError(errno.EEXIST, os.strerror(errno.EEXIST)) 820 if not preserve_contents: 821 fp.seek(0) # os.SEEK_SET 822 fp.truncate() 823 else: 824 fp.seek(2) # os.SEEK_END 825 return fp

826 827 # hashlib didn't show up until 2.5, and sha is deprecated in 2.6. 828 __Hasher = None 829 try: 830 import hashlib 831 __Hasher = hashlib.sha1 832 except ImportError: 833 import sha 834 __Hasher = sha.new

835 836 -def HashForText (text):

837 """Calculate a cryptographic hash of the given string. 838 839 For example, this is used to verify that a given module file 840 contains bindings from a previous generation run for the same 841 namespace. See L{OpenOrCreate}. If the text is in Unicode, the 842 hash is calculated on the UTF-8 encoding of the text. 843 844 @return: A C{str}, generally a sequence of hexadecimal "digit"s. 845 """ 846 if isinstance(text, six.text_type): 847 text = text.encode('utf-8') 848 return __Hasher(text).hexdigest()

849 850 # uuid didn't show up until 2.5 851 __HaveUUID = False 852 try: 853 import uuid 854 __HaveUUID = True 855 except ImportError: 856 import random

857 -def _NewUUIDString ():

858 """Obtain a UUID using the best available method. On a version of 859 python that does not incorporate the C{uuid} class, this creates a 860 string combining the current date and time (to the second) with a 861 random number. 862 863 @rtype: C{str} 864 """ 865 if __HaveUUID: 866 return uuid.uuid1().urn 867 return '%s:%08.8x' % (time.strftime('%Y%m%d%H%M%S'), random.randint(0, 0xFFFFFFFF))

868

869 -class UniqueIdentifier (object):

870 """Records a unique identifier, generally associated with a 871 binding generation action. 872 873 The identifier is a string, but gets wrapped in an instance of 874 this class to optimize comparisons and reduce memory footprint. 875 876 Invoking the constructor for this class on the same string 877 multiple times will return the same Python object. 878 879 An instance of this class compares equal to, and hashes equivalent 880 to, the uid string. When C{str}'d, the result is the uid; when 881 C{repr}'d, the result is a constructor call to 882 C{pyxb.utils.utility.UniqueIdentifier}. 883 """ 884 885 # A map from UID string to the instance that represents it 886 __ExistingUIDs = {} 887

888 - def uid (self):

889 """The string unique identifier""" 890 return self.__uid

891 __uid = None 892 893 # Support pickling, which is done using only the UID.

894 - def __getnewargs__ (self):

895 return (self.__uid,)

896

897 - def __getstate__ (self):

898 return self.__uid

899

900 - def __setstate__ (self, state):

901 assert self.__uid == state

902 903 # Singleton-like

904 - def __new__ (cls, *args):

905 if 0 == len(args): 906 uid = _NewUUIDString() 907 else: 908 uid = args[0] 909 if isinstance(uid, UniqueIdentifier): 910 uid = uid.uid() 911 if not isinstance(uid, six.string_types): 912 raise TypeError('UniqueIdentifier uid must be a string') 913 rv = cls.__ExistingUIDs.get(uid) 914 if rv is None: 915 rv = super(UniqueIdentifier, cls).__new__(cls) 916 rv.__uid = uid 917 cls.__ExistingUIDs[uid] = rv 918 return rv

919

920 - def associateObject (self, obj):

921 """Associate the given object witth this identifier. 922 923 This is a one-way association: the object is not provided with 924 a return path to this identifier instance.""" 925 self.__associatedObjects.add(obj)

926 - def associatedObjects (self):

927 """The set of objects that have been associated with this 928 identifier instance.""" 929 return self.__associatedObjects

930 __associatedObjects = None 931

932 - def __init__ (self, uid=None):

933 """Create a new UniqueIdentifier instance. 934 935 @param uid: The unique identifier string. If present, it is 936 the callers responsibility to ensure the value is universally 937 unique. If C{None}, one will be provided. 938 @type uid: C{str} or C{unicode} 939 """ 940 assert (uid is None) or (self.uid() == uid), 'UniqueIdentifier: ctor %s, actual %s' % (uid, self.uid()) 941 self.__associatedObjects = set()

942

943 - def __eq__ (self, other):

944 if other is None: 945 return False 946 elif isinstance(other, UniqueIdentifier): 947 other_uid = other.uid() 948 elif isinstance(other, six.string_types): 949 other_uid = other 950 else: 951 raise TypeError('UniqueIdentifier: Cannot compare with type %s' % (type(other),)) 952 return self.uid() == other_uid

953

954 - def __hash__ (self):

955 return hash(self.uid())

956

957 - def __str__ (self):

958 return self.uid()

959

960 - def __repr__ (self):

961 return 'pyxb.utils.utility.UniqueIdentifier(%s)' % (repr(self.uid()),)

962

963 @BackfillComparisons 964 -class UTCOffsetTimeZone (datetime.tzinfo):

965 """A C{datetime.tzinfo} subclass that helps deal with UTC 966 conversions in an ISO8601 world. 967 968 This class only supports fixed offsets from UTC. 969 """ 970 971 # Regular expression that matches valid ISO8601 time zone suffixes 972 __Lexical_re = re.compile('^([-+])(\d\d):(\d\d)$') 973 974 # The offset in minutes east of UTC. 975 __utcOffset_min = 0 976 977 # Same as __utcOffset_min, but as a datetime.timedelta 978 __utcOffset_td = None 979 980 # A zero-length duration 981 __ZeroDuration = datetime.timedelta(0) 982 983 # Range limits 984 __MaxOffset_td = datetime.timedelta(hours=14) 985

986 - def __init__ (self, spec=None):

987 """Create a time zone instance with a fixed offset from UTC. 988 989 @param spec: Specifies the offset. Can be an integer counting 990 minutes east of UTC, the value C{None} (equal to 0 minutes 991 east), or a string that conform to the ISO8601 time zone 992 sequence (B{Z}, or B{[+-]HH:MM}). 993 """ 994 995 if spec is not None: 996 if isinstance(spec, six.string_types): 997 if 'Z' == spec: 998 self.__utcOffset_min = 0 999 else: 1000 match = self.__Lexical_re.match(spec) 1001 if match is None: 1002 raise ValueError('Bad time zone: %s' % (spec,)) 1003 self.__utcOffset_min = int(match.group(2)) * 60 + int(match.group(3)) 1004 if '-' == match.group(1): 1005 self.__utcOffset_min = - self.__utcOffset_min 1006 elif isinstance(spec, int): 1007 self.__utcOffset_min = spec 1008 elif isinstance(spec, datetime.timedelta): 1009 self.__utcOffset_min = spec.seconds // 60 1010 else: 1011 raise TypeError('%s: unexpected type %s' % (type(self), type(spec))) 1012 self.__utcOffset_td = datetime.timedelta(minutes=self.__utcOffset_min) 1013 if self.__utcOffset_td < -self.__MaxOffset_td or self.__utcOffset_td > self.__MaxOffset_td: 1014 raise ValueError('XSD timezone offset %s larger than %s' % (self.__utcOffset_td, self.__MaxOffset_td)) 1015 if 0 == self.__utcOffset_min: 1016 self.__tzName = 'Z' 1017 elif 0 > self.__utcOffset_min: 1018 self.__tzName = '-%02d:%02d' % divmod(-self.__utcOffset_min, 60) 1019 else: 1020 self.__tzName = '+%02d:%02d' % divmod(self.__utcOffset_min, 60)

1021

1022 - def utcoffset (self, dt):

1023 """Returns the constant offset for this zone.""" 1024 return self.__utcOffset_td

1025

1026 - def tzname (self, dt):

1027 """Return the name of the timezone in the format expected by XML Schema.""" 1028 return self.__tzName

1029

1030 - def dst (self, dt):

1031 """Returns a constant zero duration.""" 1032 return self.__ZeroDuration

1033

1034 - def __otherForComparison (self, other):

1035 if isinstance(other, UTCOffsetTimeZone): 1036 return other.__utcOffset_min 1037 return other.utcoffset(datetime.datetime.now())

1038

1039 - def __hash__ (self):

1040 return hash(self.__utcOffset_min)

1041

1042 - def __eq__ (self, other):

1043 return self.__utcOffset_min == self.__otherForComparison(other)

1044

1045 - def __lt__ (self, other):

1046 return self.__utcOffset_min < self.__otherForComparison(other)

1047

1048 -class LocalTimeZone (datetime.tzinfo):

1049 """A C{datetime.tzinfo} subclass for the local time zone. 1050 1051 Mostly pinched from the C{datetime.tzinfo} documentation in Python 2.5.1. 1052 """ 1053 1054 __STDOffset = datetime.timedelta(seconds=-time.timezone) 1055 __DSTOffset = __STDOffset 1056 if time.daylight: 1057 __DSTOffset = datetime.timedelta(seconds=-time.altzone) 1058 __ZeroDelta = datetime.timedelta(0) 1059 __DSTDelta = __DSTOffset - __STDOffset 1060

1061 - def utcoffset (self, dt):

1062 if self.__isDST(dt): 1063 return self.__DSTOffset 1064 return self.__STDOffset

1065

1066 - def dst (self, dt):

1067 if self.__isDST(dt): 1068 return self.__DSTDelta 1069 return self.__ZeroDelta

1070

1071 - def tzname (self, dt):

1072 return time.tzname[self.__isDST(dt)]

1073

1074 - def __isDST (self, dt):

1075 tt = (dt.year, dt.month, dt.day, 1076 dt.hour, dt.minute, dt.second, 1077 0, 0, -1) 1078 tt = time.localtime(time.mktime(tt)) 1079 return tt.tm_isdst > 0

1080

1081 -class PrivateTransient_mixin (pyxb.cscRoot):

1082 """Emulate the B{transient} keyword from Java for private member 1083 variables. 1084 1085 This class defines a C{__getstate__} method which returns a copy 1086 of C{self.__dict__} with certain members removed. Specifically, 1087 if a string "s" appears in a class member variable named 1088 C{__PrivateTransient} defined in the "Class" class, then the 1089 corresponding private variable "_Class__s" will be removed from 1090 the state dictionary. This is used to eliminate unnecessary 1091 fields from instances placed in L{namespace 1092 archives<pyxb.namespace.archive.NamespaceArchive>} without having 1093 to implement a C{__getstate__} method in every class in the 1094 instance hierarchy. 1095 1096 For an example, see 1097 L{pyxb.xmlschema.structures._SchemaComponent_mixin} 1098 1099 If you use this, it is your responsibility to define the 1100 C{__PrivateTransient} class variable and add to it the required 1101 variable names. 1102 1103 Classes that inherit from this are free to define their own 1104 C{__getstate__} method, which may or may not invoke the superclass 1105 one. If you do this, be sure that the class defining 1106 C{__getstate__} lists L{PrivateTransient_mixin} as one of its 1107 direct superclasses, lest the latter end up earlier in the mro and 1108 consequently bypass the local override. 1109 """ 1110 1111 # Suffix used when creating the class member variable in which the 1112 # transient members are cached. 1113 __Attribute = '__PrivateTransient' 1114

1115 - def __getstate__ (self):

1116 state = self.__dict__.copy() 1117 # Note that the aggregate set is stored in a class variable 1118 # with a slightly different name than the class-level set. 1119 attr = '_%s%s_' % (self.__class__.__name__, self.__Attribute) 1120 skipped = getattr(self.__class__, attr, None) 1121 if skipped is None: 1122 skipped = set() 1123 for cl in self.__class__.mro(): 1124 for (k, v) in six.iteritems(cl.__dict__): 1125 if k.endswith(self.__Attribute): 1126 cl2 = k[:-len(self.__Attribute)] 1127 skipped.update([ '%s__%s' % (cl2, _n) for _n in v ]) 1128 setattr(self.__class__, attr, skipped) 1129 for k in skipped: 1130 if state.get(k) is not None: 1131 del state[k] 1132 # Uncomment the following to test whether undesirable types 1133 # are being pickled, generally by accidently leaving a 1134 # reference to one in an instance private member. 1135 #for (k, v) in six.iteritems(state): 1136 # import pyxb.namespace 1137 # import xml.dom 1138 # import pyxb.xmlschema.structures 1139 # if isinstance(v, (pyxb.namespace.NamespaceContext, xml.dom.Node, pyxb.xmlschema.structures.Schema)): 1140 # raise pyxb.LogicError('Unexpected instance of %s key %s in %s' % (type(v), k, self)) 1141 1142 return state

1143

1144 -def GetMatchingFiles (path, pattern=None, default_path_wildcard=None, default_path=None, prefix_pattern=None, prefix_substituend=None):

1145 """Provide a list of absolute paths to files present in any of a 1146 set of directories and meeting certain criteria. 1147 1148 This is used, for example, to locate namespace archive files 1149 within the archive path specified by the user. One could use:: 1150 1151 files = GetMatchingFiles('&bundles//:+', 1152 pattern=re.compile('.*\.wxs$'), 1153 default_path_wildcard='+', 1154 default_path='/usr/local/pyxb/nsarchives', 1155 prefix_pattern='&', 1156 prefix_substituend='/opt/pyxb') 1157 1158 to obtain all files that can be recursively found within 1159 C{/opt/pyxb/bundles}, or non-recursively within 1160 C{/usr/local/pyxb/nsarchives}. 1161 1162 @param path: A list of directories in which the search should be 1163 performed. The entries are separated by os.pathsep, which is a 1164 colon on POSIX platforms and a semi-colon on Windows. If a path 1165 entry ends with C{//} regardless of platform, the suffix C{//} is 1166 stripped and any directory beneath the path is scanned as well, 1167 recursively. 1168 1169 @keyword pattern: Optional regular expression object used to 1170 determine whether a given directory entry should be returned. If 1171 left as C{None}, all directory entries will be returned. 1172 1173 @keyword default_path_wildcard: An optional string which, if 1174 present as a single directory in the path, is replaced by the 1175 value of C{default-path}. 1176 1177 @keyword default_path: A system-defined directory which can be 1178 restored to the path by placing the C{default_path_wildcard} in 1179 the C{path}. 1180 1181 @keyword prefix_pattern: An optional string which, if present at 1182 the start of a path element, is replaced by the value of 1183 C{prefix_substituend}. 1184 1185 @keyword prefix_substituend: A system-defined string (path prefix) 1186 which can be combined with the user-provided path information to 1187 identify a file or subdirectory within an installation-specific 1188 area. 1189 """ 1190 matching_files = [] 1191 path_set = path.split(os.pathsep) 1192 while 0 < len(path_set): 1193 path = path_set.pop(0) 1194 if default_path_wildcard == path: 1195 if default_path is not None: 1196 path_set[0:0] = default_path.split(os.pathsep) 1197 default_path = None 1198 continue 1199 recursive = False 1200 if (prefix_pattern is not None) and path.startswith(prefix_pattern): 1201 path = os.path.join(prefix_substituend, path[len(prefix_pattern):]) 1202 if path.endswith('//'): 1203 recursive = True 1204 path = path[:-2] 1205 if os.path.isfile(path): 1206 if (pattern is None) or (pattern.search(path) is not None): 1207 matching_files.append(path) 1208 else: 1209 for (root, dirs, files) in os.walk(path): 1210 for f in files: 1211 if (pattern is None) or (pattern.search(f) is not None): 1212 matching_files.append(os.path.join(root, f)) 1213 if not recursive: 1214 break 1215 return matching_files

1216

1217 @BackfillComparisons 1218 -class Location (object):

1219 __locationBase = None 1220 __lineNumber = None 1221 __columnNumber = None 1222

1223 - def __init__ (self, location_base=None, line_number=None, column_number=None):

1224 if isinstance(location_base, str): 1225 location_base = six.moves.intern(location_base) 1226 self.__locationBase = location_base 1227 self.__lineNumber = line_number 1228 self.__columnNumber = column_number

1229

1230 - def newLocation (self, locator=None, line_number=None, column_number=None):

1231 if locator is not None: 1232 try: 1233 line_number = locator.getLineNumber() 1234 column_number = locator.getColumnNumber() 1235 except: 1236 pass 1237 return Location(self.__locationBase, line_number, column_number)

1238 1239 locationBase = property(lambda _s: _s.__locationBase) 1240 lineNumber = property(lambda _s: _s.__lineNumber) 1241 columnNumber = property(lambda _s: _s.__columnNumber) 1242

1243 - def __cmpSingleUnlessNone (self, v1, v2):

1244 if v1 is None: 1245 if v2 is None: 1246 return None 1247 return 1 1248 if v2 is None: 1249 return -1 1250 if v1 < v2: 1251 return -1 1252 if v1 == v2: 1253 return 0 1254 return 1

1255

1256 - def __cmpTupleUnlessNone (self, v1, v2):

1257 rv = self.__cmpSingleUnlessNone(v1.__locationBase, v2.__locationBase) 1258 if rv is None: 1259 rv = self.__cmpSingleUnlessNone(v1.__lineNumber, v2.__lineNumber) 1260 if rv is None: 1261 rv = self.__cmpSingleUnlessNone(v1.__columnNumber, v2.__columnNumber) 1262 return rv

1263

1264 - def __hash__ (self):

1265 return hash((self.__locationBase, self.__lineNumber, self.__columnNumber))

1266

1267 - def __eq__ (self, other):

1268 """Comparison by locationBase, then lineNumber, then columnNumber.""" 1269 if other is None: 1270 return False 1271 rv = self.__cmpTupleUnlessNone(self, other) 1272 if rv is None: 1273 return True 1274 return 0 == rv

1275

1276 - def __lt__ (self, other):

1277 if other is None: 1278 return False 1279 rv = self.__cmpTupleUnlessNone(self, other) 1280 if rv is None: 1281 return False 1282 return -1 == rv

1283

1284 - def __str__ (self):

1285 if self.locationBase is None: 1286 lb = '<unknown>' 1287 else: 1288 # No, this should not be os.sep. The location is 1289 # expected to be a URI. 1290 lb = self.locationBase.rsplit('/', 1)[-1] 1291 return '%s[%s:%s]' % (lb, self.lineNumber, self.columnNumber)

1292

1293 - def __repr__ (self):

1294 t = type(self) 1295 ctor = '%s.%s' % (t.__module__, t.__name__) 1296 return '%s(%s, %r, %r)' % (ctor, repr2to3(self.__locationBase), self.__lineNumber, self.__columnNumber)

1297

1298 -class Locatable_mixin (pyxb.cscRoot):

1299 __location = None 1300

1301 - def __init__ (self, *args, **kw):

1302 self.__location = kw.pop('location', None) 1303 super(Locatable_mixin, self).__init__(*args, **kw)

1304

1305 - def _setLocation (self, location):

1306 self.__location = location

1307

1308 - def _location (self):

1309 return self.__location

1310

1311 -def repr2to3 (v):

1312 """Filtered built-in repr for python 2/3 compatibility in 1313 generated bindings. 1314 1315 All generated string values are to be unicode. We always import 1316 unicode_literals from __future__, so we want plain quotes with no 1317 prefix u. Strip that off. 1318 1319 Integer constants should not have the suffix L even if they do not 1320 fit in a Python2 int. The references generated through this 1321 function are never used for calculations, so the implicit cast to 1322 a larger type is sufficient. 1323 1324 All other values use their standard representations. 1325 """ 1326 if isinstance(v, six.string_types): 1327 qu = QuotedEscaped(v) 1328 if 'u' == qu[0]: 1329 return qu[1:] 1330 return qu 1331 if isinstance(v, six.integer_types): 1332 vs = repr(v) 1333 if vs.endswith('L'): 1334 return vs[:-1] 1335 return vs 1336 return repr(v)

1337

Source Code for Module pyxb.utils.utility