1   
   2   
   3   
   4   
   5   
   6   
   7   
   8   
   9   
  10   
  11   
  12   
  13   
  14   
  15   
  16  """Utility functions and classes.""" 
  17   
  18  import re 
  19  import os 
  20  import errno 
  21  import pyxb 
  22  from pyxb.utils.six.moves.urllib import parse as urlparse 
  23  import time 
  24  import datetime 
  25  import logging 
  26  from pyxb.utils import six 
  27   
  28  _log = logging.getLogger(__name__) 
  31      """A dummy class used to hold arbitrary attributes. 
  32   
  33      Essentially this gives us a map without having to worry about 
  34      converting names to text to use as keys. 
  35      """ 
  36      pass 
   37   
  39      """Class decorator that fills in missing ordering methods. 
  40   
  41      Concept derived from Python 2.7.5 functools.total_ordering, 
  42      but this version requires that __eq__ and __lt__ be provided, 
  43      and unconditionally overrides __ne__, __gt__, __le__, and __ge__ 
  44      with the derived versions. 
  45   
  46      This is still necessary in Python 3 because in Python 3 the 
  47      comparison x >= y is done by the __ge__ inherited from object, 
  48      which does not handle the case where x and y are not the same type 
  49      even if the underlying y < x would convert x to be compatible. """ 
  50   
  51      def applyconvert (cls, derived): 
  52          for (opn, opx) in derived: 
  53              opx.__name__ = opn 
  54              opx.__doc__ = getattr(int, opn).__doc__ 
  55              setattr(cls, opn, opx) 
   56   
  57      applyconvert(cls, ( 
  58              ('__gt__', lambda self, other: not (self.__lt__(other) or self.__eq__(other))), 
  59              ('__le__', lambda self, other: self.__lt__(other) or self.__eq__(other)), 
  60              ('__ge__', lambda self, other: not self.__lt__(other)) 
  61              )) 
  62      applyconvert(cls, ( 
  63              ('__ne__', lambda self, other: not self.__eq__(other)), 
  64              )) 
  65      return cls 
  66   
  68      """Tuple comparison that permits C{None} as lower than any value, 
  69      and defines other cross-type comparison. 
  70   
  71      @return: -1 if lhs < rhs, 0 if lhs == rhs, 1 if lhs > rhs.""" 
  72      li = iter(lhs) 
  73      ri = iter(rhs) 
  74      while True: 
  75          try: 
  76              (lv, rv) = (next(li), next(ri)) 
  77              if lv is None: 
  78                  if rv is None: 
  79                      continue 
  80                  return -1 
  81              if rv is None: 
  82                  return 1 
  83              if lv == rv: 
  84                  continue 
  85              if lv < rv: 
  86                  return -1 
  87              return 1 
  88          except StopIteration: 
  89              nl = len(lhs) 
  90              nr = len(rhs) 
  91              if nl < nr: 
  92                  return -1 
  93              if nl == nr: 
  94                  return 0 
  95              return 1 
   96   
  98      """Convert a string into a literal value that can be used in Python source. 
  99   
 100      This just calls C{repr}.  No point in getting all complex when the language 
 101      already gives us what we need. 
 102   
 103      @rtype: C{str} 
 104      """ 
 105      return repr(s) 
  106   
 108      """Default implementation for _XMLIdentifierToPython 
 109   
 110      For historical reasons, this converts the identifier from a str to 
 111      unicode in the system default encoding.  This should have no 
 112      practical effect. 
 113   
 114      @param identifier : some XML identifier 
 115   
 116      @return: C{unicode(identifier)} 
 117      """ 
 118   
 119      return six.text_type(identifier) 
  120   
 122      """Configure a callable L{MakeIdentifier} uses to pre-process an XM Lidentifier. 
 123   
 124      In Python3, identifiers can be full Unicode tokens, but in Python2, 
 125      all identifiers must be ASCII characters.  L{MakeIdentifier} enforces 
 126      this by removing all characters that are not valid within an 
 127      identifier. 
 128   
 129      In some cases, an application generating bindings may be able to 
 130      transliterate Unicode code points that are not valid Python identifier 
 131      characters into something else.  This callable can be assigned to 
 132      perform that translation before the invalid characters are 
 133      stripped. 
 134   
 135      It is not the responsibility of this callable to do anything other 
 136      than replace whatever characters it wishes to.  All 
 137      transformations performed by L{MakeIdentifier} will still be 
 138      applied, to ensure the output is in fact a legal identifier. 
 139   
 140      @param xml_identifier_to_python : A callable that takes a string 
 141      and returns a Unicode, possibly with non-identifier characters 
 142      replaced by other characters.  Pass C{None} to reset to the 
 143      default implementation, which is L{_DefaultXMLIdentifierToPython}. 
 144   
 145      @rtype: C{unicode} 
 146      """ 
 147      global _XMLIdentifierToPython 
 148      if xml_identifier_to_python is None: 
 149          xml_identifier_to_python = _DefaultXMLIdentifierToPython 
 150      _XMLIdentifierToPython = xml_identifier_to_python 
  151   
 152  _XMLIdentifierToPython = _DefaultXMLIdentifierToPython 
 153   
 154  _UnderscoreSubstitute_re = re.compile(r'[- .]') 
 155  _NonIdentifier_re = re.compile(r'[^a-zA-Z0-9_]') 
 156  _PrefixUnderscore_re = re.compile(r'^_+') 
 157  _PrefixDigit_re = re.compile(r'^\d+') 
 158  _CamelCase_re = re.compile(r'_\w') 
 161      """Convert a string into something suitable to be a Python identifier. 
 162   
 163      The string is processed by L{_XMLIdentifierToPython}.  Following 
 164      this, dashes, spaces, and periods are replaced by underscores, and 
 165      characters not permitted in Python identifiers are stripped. 
 166      Furthermore, any leading underscores are removed.  If the result 
 167      begins with a digit, the character 'n' is prepended.  If the 
 168      result is the empty string, the string 'emptyString' is 
 169      substituted. 
 170   
 171      No check is made for L{conflicts with keywords <DeconflictKeyword>}. 
 172   
 173      @keyword camel_case : If C{True}, any underscore in the result 
 174      string that is immediately followed by an alphanumeric is replaced 
 175      by the capitalized version of that alphanumeric.  Thus, 
 176      'one_or_two' becomes 'oneOrTwo'.  If C{False} (default), has no 
 177      effect. 
 178   
 179      @rtype: C{str} 
 180      """ 
 181      s = _XMLIdentifierToPython(s) 
 182      s = _PrefixUnderscore_re.sub('', _NonIdentifier_re.sub('', _UnderscoreSubstitute_re.sub('_', s))) 
 183      if camel_case: 
 184          s = _CamelCase_re.sub(lambda _m: _m.group(0)[1].upper(), s) 
 185      if _PrefixDigit_re.match(s): 
 186          s = 'n' + s 
 187      if 0 == len(s): 
 188          s = 'emptyString' 
 189      return s 
  190   
 192      """Convert a string into something that can be a valid element in a 
 193      Python module path. 
 194   
 195      Module path elements are similar to identifiers, but may begin 
 196      with numbers and should not have leading underscores removed. 
 197      """ 
 198      return _UnderscoreSubstitute_re.sub('_', _XMLIdentifierToPython(s)) 
  199   
 200  _PythonKeywords = frozenset( ( 
 201          "and", "as", "assert", "break", "class", "continue", "def", "del", 
 202          "elif", "else", "except", "exec", "finally", "for", "from", "global", 
 203          "if", "import", "in", "is", "lambda", "not", "or", "pass", "print", 
 204          "raise", "return", "try", "while", "with", "yield" 
 205          ) ) 
 206  """Python keywords.  Note that types like int and float are not 
 207  keywords. 
 208   
 209  @see: U{http://docs.python.org/reference/lexical_analysis.html#keywords}.""" 
 210   
 211  _PythonBuiltInConstants = frozenset( ( 
 212          "False", "True", "None", "NotImplemented", "Ellipsis", "__debug__", 
 213           
 214           
 215           
 216          "set" 
 217          ) ) 
 218  """Other symbols that aren't keywords but that can't be used. 
 219   
 220  @see: U{http://docs.python.org/library/constants.html}.""" 
 221   
 222  _Keywords = frozenset(_PythonKeywords.union(_PythonBuiltInConstants)) 
 223  """The keywords reserved for Python, derived from L{_PythonKeywords} 
 224  and L{_PythonBuiltInConstants}.""" 
 227      """If the provided string C{s} matches a Python language keyword, 
 228      append an underscore to distinguish them. 
 229   
 230      See also L{MakeUnique}. 
 231   
 232      @param s: string to be deconflicted 
 233   
 234      @keyword aux_keywords: optional iterable of additional strings 
 235      that should be treated as keywords. 
 236   
 237      @rtype: C{str} 
 238   
 239      """ 
 240      if (s in _Keywords) or (s in aux_keywords): 
 241          return '%s_' % (s,) 
 242      return s 
  243   
 245      """Return an identifier based on C{s} that is not in the given set. 
 246   
 247      The returned identifier is made unique by appending an underscore 
 248      and, if necessary, a serial number. 
 249   
 250      The order is : C{x}, C{x_}, C{x_2}, C{x_3}, ... 
 251   
 252      @param in_use: The set of identifiers already in use in the 
 253      relevant scope.  C{in_use} is updated to contain the returned 
 254      identifier. 
 255   
 256      @rtype: C{str} 
 257      """ 
 258      if s in in_use: 
 259          ctr = 2 
 260          s = s.rstrip('_') 
 261          candidate = '%s_' % (s,) 
 262          while candidate in in_use: 
 263              candidate = '%s_%d' % (s, ctr) 
 264              ctr += 1 
 265          s = candidate 
 266      in_use.add(s) 
 267      return s 
  268   
 269 -def PrepareIdentifier (s, in_use, aux_keywords=frozenset(), private=False, protected=False): 
  270      """Combine everything required to create a unique identifier. 
 271   
 272      Leading and trailing underscores are stripped from all 
 273      identifiers. 
 274   
 275      @param in_use: the set of already used identifiers.  Upon return 
 276      from this function, it is updated to include the returned 
 277      identifier. 
 278   
 279      @keyword aux_keywords: an optional set of additional symbols that 
 280      are illegal in the given context; use this to prevent conflicts 
 281      with known method names. 
 282   
 283      @keyword private: if C{False} (default), all leading underscores 
 284      are stripped, guaranteeing the identifier will not be private.  If 
 285      C{True}, the returned identifier has two leading underscores, 
 286      making it a private variable within a Python class. 
 287   
 288      @keyword protected: as for C{private}, but uses only one 
 289      underscore. 
 290   
 291      @rtype: C{str} 
 292   
 293      @note: Only module-level identifiers should be treated as 
 294      protected.  The class-level L{_DeconflictSymbols_mixin} 
 295      infrastructure does not include protected symbols.  All class and 
 296      instance members beginning with a single underscore are reserved 
 297      for the PyXB infrastructure.""" 
 298      s = DeconflictKeyword(MakeIdentifier(s).strip('_'), aux_keywords) 
 299      if private: 
 300          s = '__' + s 
 301      elif protected: 
 302          s = '_' + s 
 303      return MakeUnique(s, in_use) 
  304   
 307      """Mix-in used to deconflict public symbols in classes that may be 
 308      inherited by generated binding classes. 
 309   
 310      Some classes, like the L{pyxb.binding.basis.element} or 
 311      L{pyxb.binding.basis.simpleTypeDefinition} classes in 
 312      L{pyxb.binding.basis}, have public symbols associated with 
 313      functions and variables.  It is possible that an XML schema might 
 314      include tags and attribute names that match these symbols.  To 
 315      avoid conflict, the reserved symbols marked in this class are 
 316      added to the pre-defined identifier set. 
 317   
 318      Subclasses should create a class-level variable 
 319      C{_ReservedSymbols} that contains a set of strings denoting the 
 320      symbols reserved in this class, combined with those from any 
 321      superclasses that also have reserved symbols.  Code like the 
 322      following is suggested:: 
 323   
 324         # For base classes (direct mix-in): 
 325         _ReservedSymbols = set([ 'one', 'two' ]) 
 326         # For subclasses: 
 327         _ReservedSymbols = SuperClass._ReservedSymbols.union(set([ 'three' ])) 
 328   
 329      Only public symbols (those with no underscores) are currently 
 330      supported.  (Private symbols can't be deconflicted that easily, 
 331      and no protected symbols that derive from the XML are created by 
 332      the binding generator.) 
 333      """ 
 334   
 335      _ReservedSymbols = set() 
 336      """There are no reserved symbols in the base class.""" 
  337   
 338   
 339  __TabCRLF_re = re.compile("[\t\n\r]") 
 340   
 341  __MultiSpace_re = re.compile(" +") 
 344      """Normalize the given string. 
 345   
 346      Exactly one of the C{preserve}, C{replace}, and C{collapse} keyword 
 347      parameters must be assigned the value C{True} by the caller. 
 348   
 349       - C{preserve}: the text is returned unchanged. 
 350   
 351       - C{replace}: all tabs, newlines, and carriage returns are 
 352       replaced with ASCII spaces. 
 353   
 354       - C{collapse}: the C{replace} normalization is done, then 
 355       sequences of two or more spaces are replaced by a single space. 
 356   
 357      See the U{whiteSpace facet<http://www.w3.org/TR/xmlschema-2/#rf-whiteSpace>}. 
 358   
 359      @rtype: C{str} 
 360      """ 
 361      if preserve: 
 362          return text 
 363      text = __TabCRLF_re.sub(' ', text) 
 364      if replace: 
 365          return text 
 366      if collapse: 
 367          return __MultiSpace_re.sub(' ', text).strip() 
 368       
 369      raise Exception('NormalizeWhitespace: No normalization specified') 
  370   
 372      """Represent a directed graph with arbitrary objects as nodes. 
 373   
 374      This is used in the L{code 
 375      generator<pyxb.binding.generate.Generator>} to determine order 
 376      dependencies among components within a namespace, and schema that 
 377      comprise various namespaces.  An edge from C{source} to C{target} 
 378      indicates that some aspect of C{source} requires that some aspect 
 379      of C{target} already be available. 
 380      """ 
 381   
 383          self.__roots = None 
 384          if root is not None: 
 385              self.__roots = set([root]) 
 386          self.__edges = set() 
 387          self.__edgeMap = { } 
 388          self.__reverseMap = { } 
 389          self.__nodes = set() 
  390   
 391      __scc = None 
 392      __sccMap = None 
 393      __dfsOrder = None 
 394   
 395 -    def addEdge (self, source, target): 
  396          """Add a directed edge from the C{source} to the C{target}. 
 397   
 398          The nodes are added to the graph if necessary. 
 399          """ 
 400          self.__edges.add( (source, target) ) 
 401          self.__edgeMap.setdefault(source, set()).add(target) 
 402          if source != target: 
 403              self.__reverseMap.setdefault(target, set()).add(source) 
 404          self.__nodes.add(source) 
 405          self.__nodes.add(target) 
  406   
 408          """Add  the given node to the graph.""" 
 409          self.__nodes.add(node) 
  410   
 411      __roots = None 
 412 -    def roots (self, reset=False): 
  413          """Return the set of nodes calculated to be roots (i.e., those that have no incoming edges). 
 414   
 415          This caches the roots calculated in a previous invocation 
 416          unless the C{reset} keyword is given the value C{True}. 
 417   
 418          @note: Upon reset, any notes that had been manually added 
 419          using L{addNode} will no longer be in the set. 
 420   
 421          @keyword reset: If C{True}, any cached value is discarded and 
 422          recomputed.  No effect if C{False} (defalut). 
 423   
 424          @rtype: C{set} 
 425          """ 
 426          if reset or (self.__roots is None): 
 427              self.__roots = set() 
 428              for n in self.__nodes: 
 429                  if not (n in self.__reverseMap): 
 430                      self.__roots.add(n) 
 431          return self.__roots 
  433          """Add the provided node as a root node, even if it has incoming edges. 
 434   
 435          The node need not be present in the graph (if necessary, it is added). 
 436   
 437          Note that roots added in this way do not survive a reset using 
 438          L{roots}. 
 439   
 440          @return: C{self} 
 441          """ 
 442          if self.__roots is None: 
 443              self.__roots = set() 
 444          self.__nodes.add(root) 
 445          self.__roots.add(root) 
 446          return self 
  447   
 449          """Return the edges in the graph. 
 450   
 451          The edge data structure is a map from the source node to the 
 452          set of nodes that can be reached in a single step from the 
 453          source. 
 454          """ 
 455          return self.__edgeMap 
  456      __edgeMap = None 
 457   
 459          """Return the edges in the graph. 
 460   
 461          The edge data structure is a set of node pairs represented as C{( source, target )}. 
 462          """ 
 463          return self.__edges 
  464   
 466          """Return the set of nodes in the graph. 
 467   
 468          The node collection data structure is a set containing node 
 469          objects, whatever they may be.""" 
 470          return self.__nodes 
  471   
 472 -    def tarjan (self, reset=False): 
  473          """Execute Tarjan's algorithm on the graph. 
 474   
 475          U{Tarjan's 
 476          algorithm<http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm>} 
 477          computes the U{strongly-connected 
 478          components<http://en.wikipedia.org/wiki/Strongly_connected_component>} 
 479          of the graph: i.e., the sets of nodes that form a minimal 
 480          closed set under edge transition.  In essence, the loops.  We 
 481          use this to detect groups of components that have a dependency 
 482          cycle. 
 483   
 484          @keyword reset: If C{True}, any cached component set is erased 
 485          and recomputed.  If C{True}, an existing previous result is 
 486          left unchanged.""" 
 487   
 488          if (self.__scc is not None) and (not reset): 
 489              return 
 490          self.__sccMap = { } 
 491          self.__stack = [] 
 492          self.__sccOrder = [] 
 493          self.__scc = [] 
 494          self.__index = 0 
 495          self.__tarjanIndex = { } 
 496          self.__tarjanLowLink = { } 
 497          for v in self.__nodes: 
 498              self.__tarjanIndex[v] = None 
 499          roots = self.roots() 
 500          if (0 == len(roots)) and (0 < len(self.__nodes)): 
 501              raise Exception('TARJAN: No roots found in graph with %d nodes' % (len(self.__nodes),)) 
 502          for r in roots: 
 503              self._tarjan(r) 
 504          self.__didTarjan = True 
  505   
 507          """Do the work of Tarjan's algorithm for a given root node.""" 
 508          if self.__tarjanIndex.get(v) is not None: 
 509               
 510              return 
 511          self.__tarjanIndex[v] = self.__tarjanLowLink[v] = self.__index 
 512          self.__index += 1 
 513          self.__stack.append(v) 
 514          source = v 
 515          for target in self.__edgeMap.get(source, []): 
 516              if self.__tarjanIndex[target] is None: 
 517                  self._tarjan(target) 
 518                  self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target]) 
 519              elif target in self.__stack: 
 520                  self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target]) 
 521              else: 
 522                  pass 
 523   
 524          if self.__tarjanLowLink[v] == self.__tarjanIndex[v]: 
 525              scc = [] 
 526              while True: 
 527                  scc.append(self.__stack.pop()) 
 528                  if v == scc[-1]: 
 529                      break 
 530              self.__sccOrder.append(scc) 
 531              if 1 < len(scc): 
 532                  self.__scc.append(scc) 
 533                  [ self.__sccMap.setdefault(_v, scc) for _v in scc ] 
  534   
 535 -    def scc (self, reset=False): 
  536          """Return the strongly-connected components of the graph. 
 537   
 538          The data structure is a set, each element of which is itself a 
 539          set containing one or more nodes from the graph. 
 540   
 541          @see: L{tarjan}. 
 542          """ 
 543          if reset or (self.__scc is None): 
 544              self.tarjan(reset) 
 545          return self.__scc 
  546      __scc = None 
 547   
 548 -    def sccMap (self, reset=False): 
  549          """Return a map from nodes to the strongly-connected component 
 550          to which the node belongs. 
 551   
 552          @keyword reset: If C{True}, the L{tarjan} method will be 
 553          re-invoked, propagating the C{reset} value.  If C{False} 
 554          (default), a cached value will be returned if available. 
 555   
 556          @see: L{tarjan}. 
 557          """ 
 558          if reset or (self.__sccMap is None): 
 559              self.tarjan(reset) 
 560          return self.__sccMap 
  561      __sccMap = None 
 562   
 564          """Return the strongly-connected components in order. 
 565   
 566          The data structure is a list, in dependency order, of strongly 
 567          connected components (which can be single nodes).  Appearance 
 568          of a node in a set earlier in the list indicates that it has 
 569          no dependencies on any node that appears in a subsequent set. 
 570          This order is preferred over L{dfsOrder} for code generation, 
 571          since it detects loops. 
 572   
 573          @see: L{tarjan}. 
 574          """ 
 575          if reset or (self.__sccOrder is None): 
 576              self.tarjan(reset) 
 577          return self.__sccOrder 
  578      __sccOrder = None 
 579   
 581          """Return the strongly-connected component to which the given 
 582          node belongs. 
 583   
 584          Any keywords suppliend when invoking this method are passed to 
 585          the L{sccMap} method. 
 586   
 587          @return: The SCC set, or C{None} if the node is not present in 
 588          the results of Tarjan's algorithm.""" 
 589   
 590          return self.sccMap(**kw).get(node) 
  591   
 593          """Return the cyclomatic complexity of the graph.""" 
 594          self.tarjan() 
 595          return len(self.__edges) - len(self.__nodes) + 2 * len(self.__scc) 
  596   
 598          assert not (source in self.__dfsWalked) 
 599          self.__dfsWalked.add(source) 
 600          for target in self.__edgeMap.get(source, []): 
 601              if not (target in self.__dfsWalked): 
 602                  self.__dfsWalk(target) 
 603          self.__dfsOrder.append(source) 
  604   
 606          node_map = { } 
 607          idx = 1 
 608          for n in self.__nodes: 
 609              node_map[n] = idx 
 610              idx += 1 
 611          text = [] 
 612          text.append('digraph "%s" {' % (title,)) 
 613          for n in self.__nodes: 
 614              if labeller is not None: 
 615                  nn = labeller(n) 
 616              else: 
 617                  nn = str(n) 
 618              text.append('%s [shape=box,label="%s"];' % (node_map[n], nn)) 
 619          for s in self.__nodes: 
 620              for d in self.__edgeMap.get(s, []): 
 621                  if s != d: 
 622                      text.append('%s -> %s;' % (node_map[s], node_map[d])) 
 623          text.append("};") 
 624          return "\n".join(text) 
  625   
 627          """Return the nodes of the graph in U{depth-first-search 
 628          order<http://en.wikipedia.org/wiki/Depth-first_search>}. 
 629   
 630          The data structure is a list.  Calculated lists are retained 
 631          and returned on future invocations, subject to the C{reset} 
 632          keyword. 
 633   
 634          @keyword reset: If C{True}, discard cached results and recompute the order.""" 
 635          if reset or (self.__dfsOrder is None): 
 636              self.__dfsWalked = set() 
 637              self.__dfsOrder = [] 
 638              for root in self.roots(reset=reset): 
 639                  self.__dfsWalk(root) 
 640              self.__dfsWalked = None 
 641              if len(self.__dfsOrder) != len(self.__nodes): 
 642                  raise Exception('DFS walk did not cover all nodes (walk %d versus nodes %d)' % (len(self.__dfsOrder), len(self.__nodes))) 
 643          return self.__dfsOrder 
  644   
 646          """Return the nodes of the graph as a sequence of root sets. 
 647   
 648          The first root set is the set of nodes that are roots: i.e., 
 649          have no incoming edges.  The second root set is the set of 
 650          nodes that have incoming nodes in the first root set.  This 
 651          continues until all nodes have been reached.  The sets impose 
 652          a partial order on the nodes, without being as constraining as 
 653          L{sccOrder}. 
 654   
 655          @return: a list of the root sets.""" 
 656          order = [] 
 657          nodes = set(self.__nodes) 
 658          edge_map = {} 
 659          for (d, srcs) in six.iteritems(self.__edgeMap): 
 660              edge_map[d] = srcs.copy() 
 661          while nodes: 
 662              freeset = set() 
 663              for n in nodes: 
 664                  if not (n in edge_map): 
 665                      freeset.add(n) 
 666              if 0 == len(freeset): 
 667                  _log.error('dependency cycle in named components') 
 668                  return None 
 669              order.append(freeset) 
 670              nodes.difference_update(freeset) 
 671              new_edge_map = {} 
 672              for (d, srcs) in six.iteritems(edge_map): 
 673                  srcs.difference_update(freeset) 
 674                  if 0 != len(srcs): 
 675                      new_edge_map[d] = srcs 
 676              edge_map = new_edge_map 
 677          return order 
   678   
 679  LocationPrefixRewriteMap_ = { } 
 686   
 688      """Normalize a URI against an optional parent_uri in the way that is 
 689      done for C{schemaLocation} attribute values. 
 690   
 691      If no URI schema is present, this will normalize a file system 
 692      path. 
 693   
 694      Optionally, the resulting absolute URI can subsequently be 
 695      rewritten to replace specified prefix strings with alternative 
 696      strings, e.g. to convert a remote URI to a local repository.  This 
 697      rewriting is done after the conversion to an absolute URI, but 
 698      before normalizing file system URIs. 
 699   
 700      @param uri : The URI to normalize.  If C{None}, function returns 
 701      C{None} 
 702   
 703      @param parent_uri : The base URI against which normalization is 
 704      done, if C{uri} is a relative URI. 
 705   
 706      @param prefix_map : A map used to rewrite URI prefixes.  If 
 707      C{None}, the value defaults to that stored by 
 708      L{SetLocationPrefixRewriteMap}. 
 709   
 710      """ 
 711      if uri is None: 
 712          return uri 
 713      if parent_uri is None: 
 714          abs_uri = uri 
 715      else: 
 716          abs_uri = urlparse.urljoin(parent_uri, uri) 
 717      if prefix_map is None: 
 718          prefix_map = LocationPrefixRewriteMap_ 
 719      for (pfx, sub) in six.iteritems(prefix_map): 
 720          if abs_uri.startswith(pfx): 
 721              abs_uri = sub + abs_uri[len(pfx):] 
 722      if 0 > abs_uri.find(':'): 
 723          abs_uri = os.path.realpath(abs_uri) 
 724      return abs_uri 
  725   
 728      """Retrieve the contents of the uri as raw data. 
 729   
 730      If the uri does not include a scheme (e.g., C{http:}), it is 
 731      assumed to be a file path on the local system.""" 
 732   
 733      from pyxb.utils.six.moves.urllib.request import urlopen 
 734      stream = None 
 735      exc = None 
 736       
 737       
 738       
 739      if 0 <= uri.find(':'): 
 740          try: 
 741              stream = urlopen(uri) 
 742          except Exception as e: 
 743              exc = e 
 744          if (stream is None) and six.PY2: 
 745              import urllib 
 746              try: 
 747                  stream = urllib.urlopen(uri) 
 748                  exc = None 
 749              except: 
 750                   
 751                  pass 
 752      if stream is None: 
 753           
 754          try: 
 755              stream = open(uri, 'rb') 
 756              exc = None 
 757          except Exception as e: 
 758              if exc is None: 
 759                  exc = e 
 760      if exc is not None: 
 761          _log.error('open %s', uri, exc_info=exc) 
 762          raise exc 
 763      try: 
 764           
 765           
 766          if isinstance(stream, six.moves.file) or isinstance(stream.fp, six.moves.file): 
 767              archive_directory = None 
 768      except: 
 769          pass 
 770      xmld = stream.read() 
 771      if archive_directory: 
 772          base_name = os.path.basename(os.path.normpath(urlparse.urlparse(uri)[2])) 
 773          counter = 1 
 774          dest_file = os.path.join(archive_directory, base_name) 
 775          while os.path.isfile(dest_file): 
 776              dest_file = os.path.join(archive_directory, '%s.%d' % (base_name, counter)) 
 777              counter += 1 
 778          try: 
 779              OpenOrCreate(dest_file).write(xmld) 
 780          except OSError as e: 
 781              _log.warning('Unable to save %s in %s: %s', uri, dest_file, e) 
 782      return xmld 
  783   
 784 -def OpenOrCreate (file_name, tag=None, preserve_contents=False): 
  785      """Return a file object used to write binary data into the given file. 
 786   
 787      Use the C{tag} keyword to preserve the contents of existing files 
 788      that are not supposed to be overwritten. 
 789   
 790      To get a writable file but leaving any existing contents in place, 
 791      set the C{preserve_contents} keyword to C{True}.  Normally, existing file 
 792      contents are erased. 
 793   
 794      The returned file pointer is positioned at the end of the file. 
 795   
 796      @keyword tag: If not C{None} and the file already exists, absence 
 797      of the given value in the first 4096 bytes of the file (decoded as 
 798      UTF-8) causes an C{IOError} to be raised with C{errno} set to 
 799      C{EEXIST}.  I.e., only files with this value in the first 4KB will 
 800      be returned for writing. 
 801   
 802      @keyword preserve_contents: This value controls whether existing 
 803      contents of the file will be erased (C{False}, default) or left in 
 804      place (C{True}). 
 805      """ 
 806      (path, leaf) = os.path.split(file_name) 
 807      if path: 
 808          try: 
 809              os.makedirs(path) 
 810          except Exception as e: 
 811              if not (isinstance(e, (OSError, IOError)) and (errno.EEXIST == e.errno)): 
 812                  raise 
 813      fp = open(file_name, 'ab+') 
 814      if (tag is not None) and (0 < os.fstat(fp.fileno()).st_size): 
 815          fp.seek(0)  
 816          blockd = fp.read(4096) 
 817          blockt = blockd.decode('utf-8') 
 818          if 0 > blockt.find(tag): 
 819              raise OSError(errno.EEXIST, os.strerror(errno.EEXIST)) 
 820      if not preserve_contents: 
 821          fp.seek(0)  
 822          fp.truncate() 
 823      else: 
 824          fp.seek(2)  
 825      return fp 
  826   
 827   
 828  __Hasher = None 
 829  try: 
 830      import hashlib 
 831      __Hasher = hashlib.sha1 
 832  except ImportError: 
 833      import sha 
 834      __Hasher = sha.new 
 835   
 836 -def HashForText (text): 
  837      """Calculate a cryptographic hash of the given string. 
 838   
 839      For example, this is used to verify that a given module file 
 840      contains bindings from a previous generation run for the same 
 841      namespace.  See L{OpenOrCreate}.  If the text is in Unicode, the 
 842      hash is calculated on the UTF-8 encoding of the text. 
 843   
 844      @return: A C{str}, generally a sequence of hexadecimal "digit"s. 
 845      """ 
 846      if isinstance(text, six.text_type): 
 847          text = text.encode('utf-8') 
 848      return __Hasher(text).hexdigest() 
  849   
 850   
 851  __HaveUUID = False 
 852  try: 
 853      import uuid 
 854      __HaveUUID = True 
 855  except ImportError: 
 856      import random 
 858      """Obtain a UUID using the best available method.  On a version of 
 859      python that does not incorporate the C{uuid} class, this creates a 
 860      string combining the current date and time (to the second) with a 
 861      random number. 
 862   
 863      @rtype: C{str} 
 864      """ 
 865      if __HaveUUID: 
 866          return uuid.uuid1().urn 
 867      return '%s:%08.8x' % (time.strftime('%Y%m%d%H%M%S'), random.randint(0, 0xFFFFFFFF)) 
  868   
 870      """Records a unique identifier, generally associated with a 
 871      binding generation action. 
 872   
 873      The identifier is a string, but gets wrapped in an instance of 
 874      this class to optimize comparisons and reduce memory footprint. 
 875   
 876      Invoking the constructor for this class on the same string 
 877      multiple times will return the same Python object. 
 878   
 879      An instance of this class compares equal to, and hashes equivalent 
 880      to, the uid string.  When C{str}'d, the result is the uid; when 
 881      C{repr}'d, the result is a constructor call to 
 882      C{pyxb.utils.utility.UniqueIdentifier}. 
 883      """ 
 884   
 885       
 886      __ExistingUIDs = {} 
 887   
 889          """The string unique identifier""" 
 890          return self.__uid 
  891      __uid = None 
 892   
 893       
 896   
 899   
 902   
 903       
 919   
 921          """Associate the given object witth this identifier. 
 922   
 923          This is a one-way association: the object is not provided with 
 924          a return path to this identifier instance.""" 
 925          self.__associatedObjects.add(obj) 
  927          """The set of objects that have been associated with this 
 928          identifier instance.""" 
 929          return self.__associatedObjects 
  930      __associatedObjects = None 
 931   
 933          """Create a new UniqueIdentifier instance. 
 934   
 935          @param uid: The unique identifier string.  If present, it is 
 936          the callers responsibility to ensure the value is universally 
 937          unique.  If C{None}, one will be provided. 
 938          @type uid: C{str} or C{unicode} 
 939          """ 
 940          assert (uid is None) or (self.uid() == uid), 'UniqueIdentifier: ctor %s, actual %s' % (uid, self.uid()) 
 941          self.__associatedObjects = set() 
  942   
 944          if other is None: 
 945              return False 
 946          elif isinstance(other, UniqueIdentifier): 
 947              other_uid = other.uid() 
 948          elif isinstance(other, six.string_types): 
 949              other_uid = other 
 950          else: 
 951              raise TypeError('UniqueIdentifier: Cannot compare with type %s' % (type(other),)) 
 952          return self.uid() == other_uid 
  953   
 955          return hash(self.uid()) 
  956   
 959   
 961          return 'pyxb.utils.utility.UniqueIdentifier(%s)' % (repr(self.uid()),) 
   962   
 965      """A C{datetime.tzinfo} subclass that helps deal with UTC 
 966      conversions in an ISO8601 world. 
 967   
 968      This class only supports fixed offsets from UTC. 
 969      """ 
 970   
 971       
 972      __Lexical_re = re.compile('^([-+])(\d\d):(\d\d)$') 
 973   
 974       
 975      __utcOffset_min = 0 
 976   
 977       
 978      __utcOffset_td = None 
 979   
 980       
 981      __ZeroDuration = datetime.timedelta(0) 
 982   
 983       
 984      __MaxOffset_td = datetime.timedelta(hours=14) 
 985   
 987          """Create a time zone instance with a fixed offset from UTC. 
 988   
 989          @param spec: Specifies the offset.  Can be an integer counting 
 990          minutes east of UTC, the value C{None} (equal to 0 minutes 
 991          east), or a string that conform to the ISO8601 time zone 
 992          sequence (B{Z}, or B{[+-]HH:MM}). 
 993          """ 
 994   
 995          if spec is not None: 
 996              if isinstance(spec, six.string_types): 
 997                  if 'Z' == spec: 
 998                      self.__utcOffset_min = 0 
 999                  else: 
1000                      match = self.__Lexical_re.match(spec) 
1001                      if match is None: 
1002                          raise ValueError('Bad time zone: %s' % (spec,)) 
1003                      self.__utcOffset_min = int(match.group(2)) * 60 + int(match.group(3)) 
1004                      if '-' == match.group(1): 
1005                          self.__utcOffset_min = - self.__utcOffset_min 
1006              elif isinstance(spec, int): 
1007                  self.__utcOffset_min = spec 
1008              elif isinstance(spec, datetime.timedelta): 
1009                  self.__utcOffset_min = spec.seconds // 60 
1010              else: 
1011                  raise TypeError('%s: unexpected type %s' % (type(self), type(spec))) 
1012          self.__utcOffset_td = datetime.timedelta(minutes=self.__utcOffset_min) 
1013          if self.__utcOffset_td < -self.__MaxOffset_td or self.__utcOffset_td > self.__MaxOffset_td: 
1014              raise ValueError('XSD timezone offset %s larger than %s' % (self.__utcOffset_td, self.__MaxOffset_td)) 
1015          if 0 == self.__utcOffset_min: 
1016              self.__tzName = 'Z' 
1017          elif 0 > self.__utcOffset_min: 
1018              self.__tzName = '-%02d:%02d' % divmod(-self.__utcOffset_min, 60) 
1019          else: 
1020              self.__tzName = '+%02d:%02d' % divmod(self.__utcOffset_min, 60) 
 1021   
1023          """Returns the constant offset for this zone.""" 
1024          return self.__utcOffset_td 
 1025   
1027          """Return the name of the timezone in the format expected by XML Schema.""" 
1028          return self.__tzName 
 1029   
1030 -    def dst (self, dt): 
 1031          """Returns a constant zero duration.""" 
1032          return self.__ZeroDuration 
 1033   
1038   
1041   
1044   
 1047   
1049      """A C{datetime.tzinfo} subclass for the local time zone. 
1050   
1051      Mostly pinched from the C{datetime.tzinfo} documentation in Python 2.5.1. 
1052      """ 
1053   
1054      __STDOffset = datetime.timedelta(seconds=-time.timezone) 
1055      __DSTOffset = __STDOffset 
1056      if time.daylight: 
1057          __DSTOffset = datetime.timedelta(seconds=-time.altzone) 
1058      __ZeroDelta = datetime.timedelta(0) 
1059      __DSTDelta = __DSTOffset - __STDOffset 
1060   
1065   
1066 -    def dst (self, dt): 
 1070   
1073   
1075          tt = (dt.year, dt.month, dt.day, 
1076                dt.hour, dt.minute, dt.second, 
1077                0, 0, -1) 
1078          tt = time.localtime(time.mktime(tt)) 
1079          return tt.tm_isdst > 0 
  1080   
1082      """Emulate the B{transient} keyword from Java for private member 
1083      variables. 
1084   
1085      This class defines a C{__getstate__} method which returns a copy 
1086      of C{self.__dict__} with certain members removed.  Specifically, 
1087      if a string "s" appears in a class member variable named 
1088      C{__PrivateTransient} defined in the "Class" class, then the 
1089      corresponding private variable "_Class__s" will be removed from 
1090      the state dictionary.  This is used to eliminate unnecessary 
1091      fields from instances placed in L{namespace 
1092      archives<pyxb.namespace.archive.NamespaceArchive>} without having 
1093      to implement a C{__getstate__} method in every class in the 
1094      instance hierarchy. 
1095   
1096      For an example, see 
1097      L{pyxb.xmlschema.structures._SchemaComponent_mixin} 
1098   
1099      If you use this, it is your responsibility to define the 
1100      C{__PrivateTransient} class variable and add to it the required 
1101      variable names. 
1102   
1103      Classes that inherit from this are free to define their own 
1104      C{__getstate__} method, which may or may not invoke the superclass 
1105      one.  If you do this, be sure that the class defining 
1106      C{__getstate__} lists L{PrivateTransient_mixin} as one of its 
1107      direct superclasses, lest the latter end up earlier in the mro and 
1108      consequently bypass the local override. 
1109      """ 
1110   
1111       
1112       
1113      __Attribute = '__PrivateTransient' 
1114   
1116          state = self.__dict__.copy() 
1117           
1118           
1119          attr = '_%s%s_' % (self.__class__.__name__, self.__Attribute) 
1120          skipped = getattr(self.__class__, attr, None) 
1121          if skipped is None: 
1122              skipped = set() 
1123              for cl in self.__class__.mro(): 
1124                  for (k, v) in six.iteritems(cl.__dict__): 
1125                      if k.endswith(self.__Attribute): 
1126                          cl2 = k[:-len(self.__Attribute)] 
1127                          skipped.update([ '%s__%s' % (cl2, _n) for _n in v ]) 
1128              setattr(self.__class__, attr, skipped) 
1129          for k in skipped: 
1130              if state.get(k) is not None: 
1131                  del state[k] 
1132           
1133           
1134           
1135           
1136           
1137           
1138           
1139           
1140           
1141   
1142          return state 
  1143   
1144 -def GetMatchingFiles (path, pattern=None, default_path_wildcard=None, default_path=None, prefix_pattern=None, prefix_substituend=None): 
 1145      """Provide a list of absolute paths to files present in any of a 
1146      set of directories and meeting certain criteria. 
1147   
1148      This is used, for example, to locate namespace archive files 
1149      within the archive path specified by the user.  One could use:: 
1150   
1151        files = GetMatchingFiles('&bundles//:+', 
1152                                 pattern=re.compile('.*\.wxs$'), 
1153                                 default_path_wildcard='+', 
1154                                 default_path='/usr/local/pyxb/nsarchives', 
1155                                 prefix_pattern='&', 
1156                                 prefix_substituend='/opt/pyxb') 
1157   
1158      to obtain all files that can be recursively found within 
1159      C{/opt/pyxb/bundles}, or non-recursively within 
1160      C{/usr/local/pyxb/nsarchives}. 
1161   
1162      @param path: A list of directories in which the search should be 
1163      performed.  The entries are separated by os.pathsep, which is a 
1164      colon on POSIX platforms and a semi-colon on Windows.  If a path 
1165      entry ends with C{//} regardless of platform, the suffix C{//} is 
1166      stripped and any directory beneath the path is scanned as well, 
1167      recursively. 
1168   
1169      @keyword pattern: Optional regular expression object used to 
1170      determine whether a given directory entry should be returned.  If 
1171      left as C{None}, all directory entries will be returned. 
1172   
1173      @keyword default_path_wildcard: An optional string which, if 
1174      present as a single directory in the path, is replaced by the 
1175      value of C{default-path}. 
1176   
1177      @keyword default_path: A system-defined directory which can be 
1178      restored to the path by placing the C{default_path_wildcard} in 
1179      the C{path}. 
1180   
1181      @keyword prefix_pattern: An optional string which, if present at 
1182      the start of a path element, is replaced by the value of 
1183      C{prefix_substituend}. 
1184   
1185      @keyword prefix_substituend: A system-defined string (path prefix) 
1186      which can be combined with the user-provided path information to 
1187      identify a file or subdirectory within an installation-specific 
1188      area. 
1189      """ 
1190      matching_files = [] 
1191      path_set = path.split(os.pathsep) 
1192      while 0 < len(path_set): 
1193          path = path_set.pop(0) 
1194          if default_path_wildcard == path: 
1195              if default_path is not None: 
1196                  path_set[0:0] = default_path.split(os.pathsep) 
1197                  default_path = None 
1198              continue 
1199          recursive = False 
1200          if (prefix_pattern is not None) and path.startswith(prefix_pattern): 
1201              path = os.path.join(prefix_substituend, path[len(prefix_pattern):]) 
1202          if path.endswith('//'): 
1203              recursive = True 
1204              path = path[:-2] 
1205          if os.path.isfile(path): 
1206              if (pattern is None) or (pattern.search(path) is not None): 
1207                  matching_files.append(path) 
1208          else: 
1209              for (root, dirs, files) in os.walk(path): 
1210                  for f in files: 
1211                      if (pattern is None) or (pattern.search(f) is not None): 
1212                          matching_files.append(os.path.join(root, f)) 
1213                  if not recursive: 
1214                      break 
1215      return matching_files 
 1216   
1217  @BackfillComparisons 
1218 -class Location (object): 
 1219      __locationBase = None 
1220      __lineNumber = None 
1221      __columnNumber = None 
1222   
1223 -    def __init__ (self, location_base=None, line_number=None, column_number=None): 
 1224          if isinstance(location_base, str): 
1225              location_base = six.moves.intern(location_base) 
1226          self.__locationBase = location_base 
1227          self.__lineNumber = line_number 
1228          self.__columnNumber = column_number 
 1229   
1230 -    def newLocation (self, locator=None, line_number=None, column_number=None): 
 1231          if locator is not None: 
1232              try: 
1233                  line_number = locator.getLineNumber() 
1234                  column_number = locator.getColumnNumber() 
1235              except: 
1236                  pass 
1237          return Location(self.__locationBase, line_number, column_number) 
 1238   
1239      locationBase = property(lambda _s: _s.__locationBase) 
1240      lineNumber = property(lambda _s: _s.__lineNumber) 
1241      columnNumber = property(lambda _s: _s.__columnNumber) 
1242   
1244          if v1 is None: 
1245              if v2 is None: 
1246                  return None 
1247              return 1 
1248          if v2 is None: 
1249              return -1 
1250          if v1 < v2: 
1251              return -1 
1252          if v1 == v2: 
1253              return 0 
1254          return 1 
 1255   
1263   
1266   
1268          """Comparison by locationBase, then lineNumber, then columnNumber.""" 
1269          if other is None: 
1270              return False 
1271          rv = self.__cmpTupleUnlessNone(self, other) 
1272          if rv is None: 
1273              return True 
1274          return 0 == rv 
 1275   
1277          if other is None: 
1278              return False 
1279          rv = self.__cmpTupleUnlessNone(self, other) 
1280          if rv is None: 
1281              return False 
1282          return -1 == rv 
 1283   
1292   
 1297   
1310   
1312      """Filtered built-in repr for python 2/3 compatibility in 
1313      generated bindings. 
1314   
1315      All generated string values are to be unicode.  We always import 
1316      unicode_literals from __future__, so we want plain quotes with no 
1317      prefix u.  Strip that off. 
1318   
1319      Integer constants should not have the suffix L even if they do not 
1320      fit in a Python2 int.  The references generated through this 
1321      function are never used for calculations, so the implicit cast to 
1322      a larger type is sufficient. 
1323   
1324      All other values use their standard representations. 
1325      """ 
1326      if isinstance(v, six.string_types): 
1327          qu = QuotedEscaped(v) 
1328          if 'u' == qu[0]: 
1329              return qu[1:] 
1330          return qu 
1331      if isinstance(v, six.integer_types): 
1332          vs = repr(v) 
1333          if vs.endswith('L'): 
1334              return vs[:-1] 
1335          return vs 
1336      return repr(v) 
 1337