pyxb.utils.utility

1 # -*- coding: utf-8 -*- 2 # Copyright 2009-2012, Peter A. Bigot 3 # 4 # Licensed under the Apache License, Version 2.0 (the "License"); you may 5 # not use this file except in compliance with the License. You may obtain a 6 # copy of the License at: 7 # 8 # http://www.apache.org/licenses/LICENSE-2.0 9 # 10 # Unless required by applicable law or agreed to in writing, software 11 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 # License for the specific language governing permissions and limitations 14 # under the License. 15 16 """Utility functions and classes.""" 17 18 import re 19 import os 20 import errno 21 import pyxb 22 import urlparse 23 import time 24 import datetime 25 import logging 26 27 _log = logging.getLogger(__name__) 28

29 -def QuotedEscaped (s):

30 """Convert a string into a literal value that can be used in Python source. 31 32 This just calls C{repr}. No point in getting all complex when the language 33 already gives us what we need. 34 35 @rtype: C{str} 36 """ 37 return repr(s)

38

39 -def _DefaultXMLIdentifierToPython (identifier):

40 """Default implementation for _XMLIdentifierToPython 41 42 For historical reasons, this converts the identifier from a str to 43 unicode in the system default encoding. This should have no 44 practical effect. 45 46 @param identifier : some XML identifier 47 48 @return: C{unicode(identifier)} 49 """ 50 51 return unicode(identifier)

52

53 -def _SetXMLIdentifierToPython (xml_identifier_to_python):

54 """Configure a callable L{MakeIdentifier} uses to pre-process an XM Lidentifier. 55 56 In Python3, identifiers can be full Unicode tokens, but in Python2, 57 all identifiers must be ASCII characters. L{MakeIdentifier} enforces 58 this by removing all characters that are not valid within an 59 identifier. 60 61 In some cases, an application generating bindings may be able to 62 transliterate Unicode code points that are not valid Python identifier 63 characters into something else. This callable can be assigned to 64 perform that translation before the invalid characters are 65 stripped. 66 67 It is not the responsibility of this callable to do anything other 68 than replace whatever characters it wishes to. All 69 transformations performed by L{MakeIdentifier} will still be 70 applied, to ensure the output is in fact a legal identifier. 71 72 @param xml_identifier_to_python : A callable that takes a string 73 and returns a Unicode, possibly with non-identifier characters 74 replaced by other characters. Pass C{None} to reset to the 75 default implementation, which is L{_DefaultXMLIdentifierToPython}. 76 77 @rtype: C{unicode} 78 """ 79 global _XMLIdentifierToPython 80 if xml_identifier_to_python is None: 81 xml_identifier_to_python = _DefaultXMLIdentifierToPython 82 _XMLIdentifierToPython = xml_identifier_to_python

83 84 _XMLIdentifierToPython = _DefaultXMLIdentifierToPython 85 86 _UnderscoreSubstitute_re = re.compile(r'[- .]') 87 _NonIdentifier_re = re.compile(r'[^a-zA-Z0-9_]') 88 _PrefixUnderscore_re = re.compile(r'^_+') 89 _PrefixDigit_re = re.compile(r'^\d+') 90 _CamelCase_re = re.compile(r'_\w') 91

92 -def MakeIdentifier (s, camel_case=False):

93 """Convert a string into something suitable to be a Python identifier. 94 95 The string is processed by L{_XMLIdentifierToPython}. Following 96 this, dashes, spaces, and periods are replaced by underscores, and 97 characters not permitted in Python identifiers are stripped. 98 Furthermore, any leading underscores are removed. If the result 99 begins with a digit, the character 'n' is prepended. If the 100 result is the empty string, the string 'emptyString' is 101 substituted. 102 103 No check is made for L{conflicts with keywords <DeconflictKeyword>}. 104 105 @keyword camel_case : If C{True}, any underscore in the result 106 string that is immediately followed by an alphanumeric is replaced 107 by the capitalized version of that alphanumeric. Thus, 108 'one_or_two' becomes 'oneOrTwo'. If C{False} (default), has no 109 effect. 110 111 @rtype: C{str} 112 """ 113 s = _XMLIdentifierToPython(s) 114 s = _PrefixUnderscore_re.sub('', _NonIdentifier_re.sub('', _UnderscoreSubstitute_re.sub('_', s))) 115 if camel_case: 116 s = _CamelCase_re.sub(lambda _m: _m.group(0)[1].upper(), s) 117 if _PrefixDigit_re.match(s): 118 s = 'n' + s 119 if 0 == len(s): 120 s = 'emptyString' 121 return s

122 123 _PythonKeywords = frozenset( ( 124 "and", "as", "assert", "break", "class", "continue", "def", "del", 125 "elif", "else", "except", "exec", "finally", "for", "from", "global", 126 "if", "import", "in", "is", "lambda", "not", "or", "pass", "print", 127 "raise", "return", "try", "while", "with", "yield" 128 ) ) 129 """Python keywords. Note that types like int and float are not 130 keywords. 131 132 @see: U{http://docs.python.org/reference/lexical_analysis.html#keywords}.""" 133 134 _PythonBuiltInConstants = frozenset( ( 135 "False", "True", "None", "NotImplemented", "Ellipsis", "__debug__", 136 ) ) 137 """Other symbols that aren't keywords but that can't be used. 138 139 @see: U{http://docs.python.org/library/constants.html}.""" 140 141 _Keywords = frozenset(_PythonKeywords.union(_PythonBuiltInConstants)) 142 """The keywords reserved for Python, derived from L{_PythonKeywords} 143 and L{_PythonBuiltInConstants}.""" 144

145 -def DeconflictKeyword (s, aux_keywords=frozenset()):

146 """If the provided string C{s} matches a Python language keyword, 147 append an underscore to distinguish them. 148 149 See also L{MakeUnique}. 150 151 @param s: string to be deconflicted 152 153 @keyword aux_keywords: optional iterable of additional strings 154 that should be treated as keywords. 155 156 @rtype: C{str} 157 158 """ 159 if (s in _Keywords) or (s in aux_keywords): 160 return '%s_' % (s,) 161 return s

162

163 -def MakeUnique (s, in_use):

164 """Return an identifier based on C{s} that is not in the given set. 165 166 The returned identifier is made unique by appending an underscore 167 and, if necessary, a serial number. 168 169 The order is : C{x}, C{x_}, C{x_2}, C{x_3}, ... 170 171 @param in_use: The set of identifiers already in use in the 172 relevant scope. C{in_use} is updated to contain the returned 173 identifier. 174 175 @rtype: C{str} 176 """ 177 if s in in_use: 178 ctr = 2 179 s = s.rstrip('_') 180 candidate = '%s_' % (s,) 181 while candidate in in_use: 182 candidate = '%s_%d' % (s, ctr) 183 ctr += 1 184 s = candidate 185 in_use.add(s) 186 return s

187

188 -def PrepareIdentifier (s, in_use, aux_keywords=frozenset(), private=False, protected=False):

189 """Combine everything required to create a unique identifier. 190 191 Leading and trailing underscores are stripped from all 192 identifiers. 193 194 @param in_use: the set of already used identifiers. Upon return 195 from this function, it is updated to include the returned 196 identifier. 197 198 @keyword aux_keywords: an optional set of additional symbols that 199 are illegal in the given context; use this to prevent conflicts 200 with known method names. 201 202 @keyword private: if C{False} (default), all leading underscores 203 are stripped, guaranteeing the identifier will not be private. If 204 C{True}, the returned identifier has two leading underscores, 205 making it a private variable within a Python class. 206 207 @keyword protected: as for C{private}, but uses only one 208 underscore. 209 210 @rtype: C{str} 211 212 @note: Only module-level identifiers should be treated as 213 protected. The class-level L{_DeconflictSymbols_mixin} 214 infrastructure does not include protected symbols. All class and 215 instance members beginning with a single underscore are reserved 216 for the PyXB infrastructure.""" 217 s = DeconflictKeyword(MakeIdentifier(s).strip('_'), aux_keywords) 218 if private: 219 s = '__' + s 220 elif protected: 221 s = '_' + s 222 return MakeUnique(s, in_use)

223 224 # @todo: descend from pyxb.cscRoot, if we import pyxb

225 -class _DeconflictSymbols_mixin (object):

226 """Mix-in used to deconflict public symbols in classes that may be 227 inherited by generated binding classes. 228 229 Some classes, like the L{pyxb.binding.basis.element} or 230 L{pyxb.binding.basis.simpleTypeDefinition} classes in 231 L{pyxb.binding.basis}, have public symbols associated with 232 functions and variables. It is possible that an XML schema might 233 include tags and attribute names that match these symbols. To 234 avoid conflict, the reserved symbols marked in this class are 235 added to the pre-defined identifier set. 236 237 Subclasses should create a class-level variable 238 C{_ReservedSymbols} that contains a set of strings denoting the 239 symbols reserved in this class, combined with those from any 240 superclasses that also have reserved symbols. Code like the 241 following is suggested:: 242 243 # For base classes (direct mix-in): 244 _ReservedSymbols = set([ 'one', 'two' ]) 245 # For subclasses: 246 _ReservedSymbols = SuperClass._ReservedSymbols.union(set([ 'three' ])) 247 248 Only public symbols (those with no underscores) are currently 249 supported. (Private symbols can't be deconflicted that easily, 250 and no protected symbols that derive from the XML are created by 251 the binding generator.) 252 """ 253 254 _ReservedSymbols = set() 255 """There are no reserved symbols in the base class."""

256 257 # Regular expression detecting tabs, carriage returns, and line feeds 258 __TabCRLF_re = re.compile("[\t\n\r]") 259 # Regular expressoin detecting sequences of two or more spaces 260 __MultiSpace_re = re.compile(" +") 261

262 -def NormalizeWhitespace (text, preserve=False, replace=False, collapse=False):

263 """Normalize the given string. 264 265 Exactly one of the C{preserve}, C{replace}, and C{collapse} keyword 266 parameters must be assigned the value C{True} by the caller. 267 268 - C{preserve}: the text is returned unchanged. 269 270 - C{replace}: all tabs, newlines, and carriage returns are 271 replaced with ASCII spaces. 272 273 - C{collapse}: the C{replace} normalization is done, then 274 sequences of two or more spaces are replaced by a single space. 275 276 See the U{whiteSpace facet<http://www.w3.org/TR/xmlschema-2/#rf-whiteSpace>}. 277 278 @rtype: C{str} 279 """ 280 if preserve: 281 return text 282 text = __TabCRLF_re.sub(' ', text) 283 if replace: 284 return text 285 if collapse: 286 return __MultiSpace_re.sub(' ', text).strip() 287 # pyxb not imported here; could be. 288 raise Exception('NormalizeWhitespace: No normalization specified')

289

290 -class Graph:

291 """Represent a directed graph with arbitrary objects as nodes. 292 293 This is used in the L{code 294 generator<pyxb.binding.generate.Generator>} to determine order 295 dependencies among components within a namespace, and schema that 296 comprise various namespaces. An edge from C{source} to C{target} 297 indicates that some aspect of C{source} requires that some aspect 298 of C{target} already be available. 299 """ 300

301 - def __init__ (self, root=None):

302 self.__roots = None 303 if root is not None: 304 self.__roots = set([root]) 305 self.__edges = set() 306 self.__edgeMap = { } 307 self.__reverseMap = { } 308 self.__nodes = set()

309 310 __scc = None 311 __sccMap = None 312 __dfsOrder = None 313

314 - def addEdge (self, source, target):

315 """Add a directed edge from the C{source} to the C{target}. 316 317 The nodes are added to the graph if necessary. 318 """ 319 self.__edges.add( (source, target) ) 320 self.__edgeMap.setdefault(source, set()).add(target) 321 if source != target: 322 self.__reverseMap.setdefault(target, set()).add(source) 323 self.__nodes.add(source) 324 self.__nodes.add(target)

325

326 - def addNode (self, node):

327 """Add the given node to the graph.""" 328 self.__nodes.add(node)

329 330 __roots = None

331 - def roots (self, reset=False):

332 """Return the set of nodes calculated to be roots (i.e., those that have no incoming edges). 333 334 This caches the roots calculated in a previous invocation 335 unless the C{reset} keyword is given the value C{True}. 336 337 @note: Upon reset, any notes that had been manually added 338 using L{addNode} will no longer be in the set. 339 340 @keyword reset: If C{True}, any cached value is discarded and 341 recomputed. No effect if C{False} (defalut). 342 343 @rtype: C{set} 344 """ 345 if reset or (self.__roots is None): 346 self.__roots = set() 347 for n in self.__nodes: 348 if not (n in self.__reverseMap): 349 self.__roots.add(n) 350 return self.__roots

351 - def addRoot (self, root):

352 """Add the provided node as a root node, even if it has incoming edges. 353 354 The node need not be present in the graph (if necessary, it is added). 355 356 Note that roots added in this way do not survive a reset using 357 L{roots}. 358 359 @return: C{self} 360 """ 361 if self.__roots is None: 362 self.__roots = set() 363 self.__nodes.add(root) 364 self.__roots.add(root) 365 return self

366

367 - def edgeMap (self):

368 """Return the edges in the graph. 369 370 The edge data structure is a map from the source node to the 371 set of nodes that can be reached in a single step from the 372 source. 373 """ 374 return self.__edgeMap

375 __edgeMap = None 376

377 - def edges (self):

378 """Return the edges in the graph. 379 380 The edge data structure is a set of node pairs represented as C{( source, target )}. 381 """ 382 return self.__edges

383

384 - def nodes (self):

385 """Return the set of nodes in the graph. 386 387 The node collection data structure is a set containing node 388 objects, whatever they may be.""" 389 return self.__nodes

390

391 - def tarjan (self, reset=False):

392 """Execute Tarjan's algorithm on the graph. 393 394 U{Tarjan's 395 algorithm<http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm>} 396 computes the U{strongly-connected 397 components<http://en.wikipedia.org/wiki/Strongly_connected_component>} 398 of the graph: i.e., the sets of nodes that form a minimal 399 closed set under edge transition. In essence, the loops. We 400 use this to detect groups of components that have a dependency 401 cycle. 402 403 @keyword reset: If C{True}, any cached component set is erased 404 and recomputed. If C{True}, an existing previous result is 405 left unchanged.""" 406 407 if (self.__scc is not None) and (not reset): 408 return 409 self.__sccMap = { } 410 self.__stack = [] 411 self.__sccOrder = [] 412 self.__scc = [] 413 self.__index = 0 414 self.__tarjanIndex = { } 415 self.__tarjanLowLink = { } 416 for v in self.__nodes: 417 self.__tarjanIndex[v] = None 418 roots = self.roots() 419 if (0 == len(roots)) and (0 < len(self.__nodes)): 420 raise Exception('TARJAN: No roots found in graph with %d nodes' % (len(self.__nodes),)) 421 for r in roots: 422 self._tarjan(r) 423 self.__didTarjan = True

424

425 - def _tarjan (self, v):

426 """Do the work of Tarjan's algorithm for a given root node.""" 427 if self.__tarjanIndex.get(v) is not None: 428 # "Root" was already reached. 429 return 430 self.__tarjanIndex[v] = self.__tarjanLowLink[v] = self.__index 431 self.__index += 1 432 self.__stack.append(v) 433 source = v 434 for target in self.__edgeMap.get(source, []): 435 if self.__tarjanIndex[target] is None: 436 self._tarjan(target) 437 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target]) 438 elif target in self.__stack: 439 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target]) 440 else: 441 pass 442 443 if self.__tarjanLowLink[v] == self.__tarjanIndex[v]: 444 scc = [] 445 while True: 446 scc.append(self.__stack.pop()) 447 if v == scc[-1]: 448 break 449 self.__sccOrder.append(scc) 450 if 1 < len(scc): 451 self.__scc.append(scc) 452 [ self.__sccMap.setdefault(_v, scc) for _v in scc ]

453

454 - def scc (self, reset=False):

455 """Return the strongly-connected components of the graph. 456 457 The data structure is a set, each element of which is itself a 458 set containing one or more nodes from the graph. 459 460 @see: L{tarjan}. 461 """ 462 if reset or (self.__scc is None): 463 self.tarjan(reset) 464 return self.__scc

465 __scc = None 466

467 - def sccMap (self, reset=False):

468 """Return a map from nodes to the strongly-connected component 469 to which the node belongs. 470 471 @keyword reset: If C{True}, the L{tarjan} method will be 472 re-invoked, propagating the C{reset} value. If C{False} 473 (default), a cached value will be returned if available. 474 475 @see: L{tarjan}. 476 """ 477 if reset or (self.__sccMap is None): 478 self.tarjan(reset) 479 return self.__sccMap

480 __sccMap = None 481

482 - def sccOrder (self, reset=False):

483 """Return the strongly-connected components in order. 484 485 The data structure is a list, in dependency order, of strongly 486 connected components (which can be single nodes). Appearance 487 of a node in a set earlier in the list indicates that it has 488 no dependencies on any node that appears in a subsequent set. 489 This order is preferred over L{dfsOrder} for code generation, 490 since it detects loops. 491 492 @see: L{tarjan}. 493 """ 494 if reset or (self.__sccOrder is None): 495 self.tarjan(reset) 496 return self.__sccOrder

497 __sccOrder = None 498

499 - def sccForNode (self, node, **kw):

500 """Return the strongly-connected component to which the given 501 node belongs. 502 503 Any keywords suppliend when invoking this method are passed to 504 the L{sccMap} method. 505 506 @return: The SCC set, or C{None} if the node is not present in 507 the results of Tarjan's algorithm.""" 508 509 return self.sccMap(**kw).get(node, None)

510

511 - def cyclomaticComplexity (self):

512 """Return the cyclomatic complexity of the graph.""" 513 self.tarjan() 514 return len(self.__edges) - len(self.__nodes) + 2 * len(self.__scc)

515

516 - def __dfsWalk (self, source):

517 assert not (source in self.__dfsWalked) 518 self.__dfsWalked.add(source) 519 for target in self.__edgeMap.get(source, []): 520 if not (target in self.__dfsWalked): 521 self.__dfsWalk(target) 522 self.__dfsOrder.append(source)

523

524 - def _generateDOT (self, title='UNKNOWN', labeller=None):

525 node_map = { } 526 idx = 1 527 for n in self.__nodes: 528 node_map[n] = idx 529 idx += 1 530 text = [] 531 text.append('digraph "%s" {' % (title,)) 532 for n in self.__nodes: 533 if labeller is not None: 534 nn = labeller(n) 535 else: 536 nn = str(n) 537 text.append('%s [shape=box,label="%s"];' % (node_map[n], nn)) 538 for s in self.__nodes: 539 for d in self.__edgeMap.get(s, []): 540 if s != d: 541 text.append('%s -> %s;' % (node_map[s], node_map[d])) 542 text.append("};") 543 return "\n".join(text)

544

545 - def dfsOrder (self, reset=False):

546 """Return the nodes of the graph in U{depth-first-search 547 order<http://en.wikipedia.org/wiki/Depth-first_search>}. 548 549 The data structure is a list. Calculated lists are retained 550 and returned on future invocations, subject to the C{reset} 551 keyword. 552 553 @keyword reset: If C{True}, discard cached results and recompute the order.""" 554 if reset or (self.__dfsOrder is None): 555 self.__dfsWalked = set() 556 self.__dfsOrder = [] 557 for root in self.roots(reset=reset): 558 self.__dfsWalk(root) 559 self.__dfsWalked = None 560 if len(self.__dfsOrder) != len(self.__nodes): 561 raise Exception('DFS walk did not cover all nodes (walk %d versus nodes %d)' % (len(self.__dfsOrder), len(self.__nodes))) 562 return self.__dfsOrder

563 564 LocationPrefixRewriteMap_ = { } 565

566 -def SetLocationPrefixRewriteMap (prefix_map):

567 """Set the map that is used to by L{NormalizeLocation} to rewrite URI prefixes.""" 568 569 LocationPrefixRewriteMap_.clear() 570 LocationPrefixRewriteMap_.update(prefix_map)

571

572 -def NormalizeLocation (uri, parent_uri=None, prefix_map=None):

573 """Normalize a URI against an optional parent_uri in the way that is 574 done for C{schemaLocation} attribute values. 575 576 If no URI schema is present, this will normalize a file system 577 path. 578 579 Optionally, the resulting absolute URI can subsequently be 580 rewritten to replace specified prefix strings with alternative 581 strings, e.g. to convert a remote URI to a local repository. This 582 rewriting is done after the conversion to an absolute URI, but 583 before normalizing file system URIs. 584 585 @param uri : The URI to normalize. If C{None}, function returns 586 C{None} 587 588 @param parent_uri : The base URI against which normalization is 589 done, if C{uri} is a relative URI. 590 591 @param prefix_map : A map used to rewrite URI prefixes. If 592 C{None}, the value defaults to that stored by 593 L{SetLocationPrefixRewriteMap}. 594 595 """ 596 if uri is None: 597 return uri 598 if parent_uri is None: 599 abs_uri = uri 600 else: 601 #if (0 > parent_uri.find(':')) and (not parent_uri.endswith(os.sep)): 602 # parent_uri = parent_uri + os.sep 603 abs_uri = urlparse.urljoin(parent_uri, uri) 604 if prefix_map is None: 605 prefix_map = LocationPrefixRewriteMap_ 606 for (pfx, sub) in prefix_map.items(): 607 if abs_uri.startswith(pfx): 608 abs_uri = sub + abs_uri[len(pfx):] 609 if 0 > abs_uri.find(':'): 610 abs_uri = os.path.realpath(abs_uri) 611 return abs_uri

612 613

614 -def TextFromURI (uri, archive_directory=None):

615 """Retrieve the contents of the uri as a text string. 616 617 If the uri does not include a scheme (e.g., C{http:}), it is 618 assumed to be a file path on the local system.""" 619 import urllib 620 import urllib2 621 stream = None 622 exc = None 623 # Only something that has a colon is a non-file URI. Some things 624 # that have a colon are a file URI (sans schema). Prefer urllib2, 625 # but allow urllib (which apparently works better on Windows). 626 if 0 <= uri.find(':'): 627 try: 628 stream = urllib2.urlopen(uri) 629 except Exception, e: 630 exc = e 631 if stream is None: 632 try: 633 stream = urllib.urlopen(uri) 634 exc = None 635 except: 636 # Prefer urllib exception 637 pass 638 if stream is None: 639 # No go as URI; give file a chance 640 try: 641 stream = file(uri) 642 exc = None 643 except Exception, e: 644 if exc is None: 645 exc = e 646 if exc is not None: 647 _log.error('open %s', uri, exc_info=exc) 648 raise exc 649 try: 650 # Protect this in case whatever stream is doesn't have an fp 651 # attribute. 652 if isinstance(stream, file) or isinstance(stream.fp, file): 653 archive_directory = None 654 except: 655 pass 656 xmls = stream.read() 657 if archive_directory: 658 base_name = os.path.basename(os.path.normpath(urlparse.urlparse(uri)[2])) 659 counter = 1 660 dest_file = os.path.join(archive_directory, base_name) 661 while os.path.isfile(dest_file): 662 dest_file = os.path.join(archive_directory, '%s.%d' % (base_name, counter)) 663 counter += 1 664 try: 665 OpenOrCreate(dest_file).write(xmls) 666 except OSError, e: 667 _log.warning('Unable to save %s in %s: %s', uri, dest_file, e) 668 return xmls

669

670 -def OpenOrCreate (file_name, tag=None, preserve_contents=False):

671 """Return a file object used to write the given file. 672 673 Use the C{tag} keyword to preserve the contents of existing files 674 that are not supposed to be overwritten. 675 676 To get a writable file but leaving any existing contents in place, 677 set the C{preserve_contents} keyword to C{True}. Normally, existing file 678 contents are erased. 679 680 The returned file pointer is positioned at the end of the file. 681 682 @keyword tag: If not C{None} and the file already exists, absence 683 of the given value in the first 4096 bytes of the file causes an 684 C{IOError} to be raised with C{errno} set to C{EEXIST}. I.e., 685 only files with this value in the first 4KB will be returned for 686 writing. 687 688 @keyword preserve_contents: This value controls whether existing 689 contents of the file will be erased (C{False}, default) or left in 690 place (C{True}). 691 """ 692 (path, leaf) = os.path.split(file_name) 693 if path: 694 try: 695 os.makedirs(path) 696 except Exception, e: 697 if not (isinstance(e, (OSError, IOError)) and (errno.EEXIST == e.errno)): 698 raise 699 fp = file(file_name, 'ab+') 700 if (tag is not None) and (0 < os.fstat(fp.fileno()).st_size): 701 text = fp.read(4096) 702 if 0 > text.find(tag): 703 raise OSError(errno.EEXIST, os.strerror(errno.EEXIST)) 704 if not preserve_contents: 705 fp.seek(0) # os.SEEK_SET 706 fp.truncate() 707 else: 708 fp.seek(2) # os.SEEK_END 709 return fp

710 711 # hashlib didn't show up until 2.5, and sha is deprecated in 2.6. 712 __Hasher = None 713 try: 714 import hashlib 715 __Hasher = hashlib.sha1 716 except ImportError: 717 import sha 718 __Hasher = sha.new 719

720 -def HashForText (text):

721 """Calculate a cryptographic hash of the given string. 722 723 For example, this is used to verify that a given module file 724 contains bindings from a previous generation run for the same 725 namespace. See L{OpenOrCreate}. If the text is in Unicode, the 726 hash is calculated on the UTF-8 encoding of the text. 727 728 @return: A C{str}, generally a sequence of hexadecimal "digit"s. 729 """ 730 if isinstance(text, unicode): 731 text = text.encode('utf-8') 732 return __Hasher(text).hexdigest()

733 734 # uuid didn't show up until 2.5 735 __HaveUUID = False 736 try: 737 import uuid 738 __HaveUUID = True 739 except ImportError: 740 import random

741 -def _NewUUIDString ():

742 """Obtain a UUID using the best available method. On a version of 743 python that does not incorporate the C{uuid} class, this creates a 744 string combining the current date and time (to the second) with a 745 random number. 746 747 @rtype: C{str} 748 """ 749 if __HaveUUID: 750 return uuid.uuid1().urn 751 return '%s:%08.8x' % (time.strftime('%Y%m%d%H%M%S'), random.randint(0, 0xFFFFFFFFL))

752

753 -class UniqueIdentifier (object):

754 """Records a unique identifier, generally associated with a 755 binding generation action. 756 757 The identifier is a string, but gets wrapped in an instance of 758 this class to optimize comparisons and reduce memory footprint. 759 760 Invoking the constructor for this class on the same string 761 multiple times will return the same Python object. 762 763 An instance of this class compares equal to, and hashes equivalent 764 to, the uid string. When C{str}'d, the result is the uid; when 765 C{repr}'d, the result is a constructor call to 766 C{pyxb.utils.utility.UniqueIdentifier}. 767 """ 768 769 # A map from UID string to the instance that represents it 770 __ExistingUIDs = {} 771

772 - def uid (self):

773 """The string unique identifier""" 774 return self.__uid

775 __uid = None 776 777 # Support pickling, which is done using only the UID.

778 - def __getnewargs__ (self):

779 return (self.__uid,)

780

781 - def __getstate__ (self):

782 return self.__uid

783

784 - def __setstate__ (self, state):

785 assert self.__uid == state

786 787 # Singleton-like

788 - def __new__ (cls, *args):

789 if 0 == len(args): 790 uid = _NewUUIDString() 791 else: 792 uid = args[0] 793 if isinstance(uid, UniqueIdentifier): 794 uid = uid.uid() 795 if not isinstance(uid, basestring): 796 raise TypeError('UniqueIdentifier uid must be a string') 797 rv = cls.__ExistingUIDs.get(uid) 798 if rv is None: 799 rv = super(UniqueIdentifier, cls).__new__(cls) 800 rv.__uid = uid 801 cls.__ExistingUIDs[uid] = rv 802 return rv

803

804 - def associateObject (self, obj):

805 """Associate the given object witth this identifier. 806 807 This is a one-way association: the object is not provided with 808 a return path to this identifier instance.""" 809 self.__associatedObjects.add(obj)

810 - def associatedObjects (self):

811 """The set of objects that have been associated with this 812 identifier instance.""" 813 return self.__associatedObjects

814 __associatedObjects = None 815

816 - def __init__ (self, uid=None):

817 """Create a new UniqueIdentifier instance. 818 819 @param uid: The unique identifier string. If present, it is 820 the callers responsibility to ensure the value is universally 821 unique. If C{None}, one will be provided. 822 @type uid: C{str} or C{unicode} 823 """ 824 assert (uid is None) or (self.uid() == uid), 'UniqueIdentifier: ctor %s, actual %s' % (uid, self.uid()) 825 self.__associatedObjects = set()

826

827 - def __eq__ (self, other):

828 if other is None: 829 return False 830 elif isinstance(other, UniqueIdentifier): 831 other_uid = other.uid() 832 elif isinstance(other, basestring): 833 other_uid = other 834 else: 835 raise TypeError('UniqueIdentifier: Cannot compare with type %s' % (type(other),)) 836 return self.uid() == other_uid

837

838 - def __hash__ (self):

839 return hash(self.uid())

840

841 - def __str__ (self):

842 return self.uid()

843

844 - def __repr__ (self):

845 return 'pyxb.utils.utility.UniqueIdentifier(%s)' % (repr(self.uid()),)

846

847 -class UTCOffsetTimeZone (datetime.tzinfo):

848 """A C{datetime.tzinfo} subclass that helps deal with UTC 849 conversions in an ISO8601 world. 850 851 This class only supports fixed offsets from UTC. 852 """ 853 854 # Regular expression that matches valid ISO8601 time zone suffixes 855 __Lexical_re = re.compile('^([-+])(\d\d):(\d\d)$') 856 857 # The offset in minutes east of UTC. 858 __utcOffset_min = 0 859 860 # Same as __utcOffset_min, but as a datetime.timedelta 861 __utcOffset_td = None 862 863 # A zero-length duration 864 __ZeroDuration = datetime.timedelta(0) 865 866 # Range limits 867 __MaxOffset_td = datetime.timedelta(hours=14) 868

869 - def __init__ (self, spec=None):

870 """Create a time zone instance with a fixed offset from UTC. 871 872 @param spec: Specifies the offset. Can be an integer counting 873 minutes east of UTC, the value C{None} (equal to 0 minutes 874 east), or a string that conform to the ISO8601 time zone 875 sequence (B{Z}, or B{[+-]HH:MM}). 876 """ 877 878 if spec is not None: 879 if isinstance(spec, basestring): 880 if 'Z' == spec: 881 self.__utcOffset_min = 0 882 else: 883 match = self.__Lexical_re.match(spec) 884 if match is None: 885 raise ValueError('Bad time zone: %s' % (spec,)) 886 self.__utcOffset_min = int(match.group(2)) * 60 + int(match.group(3)) 887 if '-' == match.group(1): 888 self.__utcOffset_min = - self.__utcOffset_min 889 elif isinstance(spec, int): 890 self.__utcOffset_min = spec 891 elif isinstance(spec, datetime.timedelta): 892 self.__utcOffset_min = spec.seconds / 60 893 else: 894 raise TypeError('%s: unexpected type %s' % (type(self), type(spec))) 895 self.__utcOffset_td = datetime.timedelta(minutes=self.__utcOffset_min) 896 if self.__utcOffset_td < -self.__MaxOffset_td or self.__utcOffset_td > self.__MaxOffset_td: 897 raise ValueError('XSD timezone offset %s larger than %s' % (self.__utcOffset_td, self.__MaxOffset_td)) 898 if 0 == self.__utcOffset_min: 899 self.__tzName = 'Z' 900 elif 0 > self.__utcOffset_min: 901 self.__tzName = '-%02d:%02d' % divmod(-self.__utcOffset_min, 60) 902 else: 903 self.__tzName = '+%02d:%02d' % divmod(self.__utcOffset_min, 60)

904

905 - def utcoffset (self, dt):

906 """Returns the constant offset for this zone.""" 907 return self.__utcOffset_td

908

909 - def tzname (self, dt):

910 """Return the name of the timezone in the format expected by XML Schema.""" 911 return self.__tzName

912

913 - def dst (self, dt):

914 """Returns a constant zero duration.""" 915 return self.__ZeroDuration

916

917 - def __cmp__ (self, other):

918 if isinstance(other, UTCOffsetTimeZone): 919 return cmp(self.__utcOffset_min, other.__utcOffset_min) 920 return cmp(self.__utcOffset_min, other.utcoffset(datetime.datetime.now()))

921 922

923 -class LocalTimeZone (datetime.tzinfo):

924 """A C{datetime.tzinfo} subclass for the local time zone. 925 926 Mostly pinched from the C{datetime.tzinfo} documentation in Python 2.5.1. 927 """ 928 929 __STDOffset = datetime.timedelta(seconds=-time.timezone) 930 __DSTOffset = __STDOffset 931 if time.daylight: 932 __DSTOffset = datetime.timedelta(seconds=-time.altzone) 933 __ZeroDelta = datetime.timedelta(0) 934 __DSTDelta = __DSTOffset - __STDOffset 935

936 - def utcoffset (self, dt):

937 if self.__isDST(dt): 938 return self.__DSTOffset 939 return self.__STDOffset

940

941 - def dst (self, dt):

942 if self.__isDST(dt): 943 return self.__DSTDelta 944 return self.__ZeroDelta

945

946 - def tzname (self, dt):

947 return time.tzname[self.__isDST(dt)]

948

949 - def __isDST (self, dt):

950 tt = (dt.year, dt.month, dt.day, 951 dt.hour, dt.minute, dt.second, 952 0, 0, -1) 953 tt = time.localtime(time.mktime(tt)) 954 return tt.tm_isdst > 0

955

956 -class PrivateTransient_mixin (pyxb.cscRoot):

957 """Emulate the B{transient} keyword from Java for private member 958 variables. 959 960 This class defines a C{__getstate__} method which returns a copy 961 of C{self.__dict__} with certain members removed. Specifically, 962 if a string "s" appears in a class member variable named 963 C{__PrivateTransient} defined in the "Class" class, then the 964 corresponding private variable "_Class__s" will be removed from 965 the state dictionary. This is used to eliminate unnecessary 966 fields from instances placed in L{namespace 967 archives<pyxb.namespace.archive.NamespaceArchive>} without having 968 to implement a C{__getstate__} method in every class in the 969 instance hierarchy. 970 971 For an example, see 972 L{pyxb.xmlschema.structures._SchemaComponent_mixin} 973 974 If you use this, it is your responsibility to define the 975 C{__PrivateTransient} class variable and add to it the required 976 variable names. 977 978 Classes that inherit from this are free to define their own 979 C{__getstate__} method, which may or may not invoke the superclass 980 one. If you do this, be sure that the class defining 981 C{__getstate__} lists L{PrivateTransient_mixin} as one of its 982 direct superclasses, lest the latter end up earlier in the mro and 983 consequently bypass the local override. 984 """ 985 986 # Suffix used when creating the class member variable in which the 987 # transient members are cached. 988 __Attribute = '__PrivateTransient' 989

990 - def __getstate__ (self):

991 state = self.__dict__.copy() 992 # Note that the aggregate set is stored in a class variable 993 # with a slightly different name than the class-level set. 994 attr = '_%s%s_' % (self.__class__.__name__, self.__Attribute) 995 skipped = getattr(self.__class__, attr, None) 996 if skipped is None: 997 skipped = set() 998 for cl in self.__class__.mro(): 999 for (k, v) in cl.__dict__.items(): 1000 if k.endswith(self.__Attribute): 1001 cl2 = k[:-len(self.__Attribute)] 1002 skipped.update([ '%s__%s' % (cl2, _n) for _n in v ]) 1003 setattr(self.__class__, attr, skipped) 1004 for k in skipped: 1005 if state.get(k) is not None: 1006 del state[k] 1007 # Uncomment the following to test whether undesirable types 1008 # are being pickled, generally by accidently leaving a 1009 # reference to one in an instance private member. 1010 #for (k, v) in state.items(): 1011 # import pyxb.namespace 1012 # import xml.dom 1013 # import pyxb.xmlschema.structures 1014 # if isinstance(v, (pyxb.namespace.resolution.NamespaceContext, xml.dom.Node, pyxb.xmlschema.structures.Schema)): 1015 # raise pyxb.LogicError('Unexpected instance of %s key %s in %s' % (type(v), k, self)) 1016 1017 return state

1018

1019 -def GetMatchingFiles (path, pattern=None, default_path_wildcard=None, default_path=None, prefix_pattern=None, prefix_substituend=None):

1020 """Provide a list of absolute paths to files present in any of a 1021 set of directories and meeting certain criteria. 1022 1023 This is used, for example, to locate namespace archive files 1024 within the archive path specified by the user. One could use:: 1025 1026 files = GetMatchingFiles('&bundles//:+', 1027 pattern=re.compile('.*\.wxs$'), 1028 default_path_wildcard='+', 1029 default_path='/usr/local/pyxb/nsarchives', 1030 prefix_pattern='&', 1031 prefix_substituend='/opt/pyxb') 1032 1033 to obtain all files that can be recursively found within 1034 C{/opt/pyxb/bundles}, or non-recursively within 1035 C{/usr/local/pyxb/nsarchives}. 1036 1037 @param path: A list of directories in which the search should be 1038 performed. The entries are separated by os.pathsep, which is a 1039 colon on POSIX platforms and a semi-colon on Windows. If a path 1040 entry ends with C{//} regardless of platform, the suffix C{//} is 1041 stripped and any directory beneath the path is scanned as well, 1042 recursively. 1043 1044 @keyword pattern: Optional regular expression object used to 1045 determine whether a given directory entry should be returned. If 1046 left as C{None}, all directory entries will be returned. 1047 1048 @keyword default_path_wildcard: An optional string which, if 1049 present as a single directory in the path, is replaced by the 1050 value of C{default-path}. 1051 1052 @keyword default_path: A system-defined directory which can be 1053 restored to the path by placing the C{default_path_wildcard} in 1054 the C{path}. 1055 1056 @keyword prefix_pattern: An optional string which, if present at 1057 the start of a path element, is replaced by the value of 1058 C{prefix_substituend}. 1059 1060 @keyword prefix_substituend: A system-defined string (path prefix) 1061 which can be combined with the user-provided path information to 1062 identify a file or subdirectory within an installation-specific 1063 area. 1064 """ 1065 matching_files = [] 1066 path_set = path.split(os.pathsep) 1067 while 0 < len(path_set): 1068 path = path_set.pop(0) 1069 if default_path_wildcard == path: 1070 if default_path is not None: 1071 path_set[0:0] = default_path.split(os.pathsep) 1072 default_path = None 1073 continue 1074 recursive = False 1075 if (prefix_pattern is not None) and path.startswith(prefix_pattern): 1076 path = os.path.join(prefix_substituend, path[len(prefix_pattern):]) 1077 if path.endswith('//'): 1078 recursive = True 1079 path = path[:-2] 1080 if os.path.isfile(path): 1081 if (pattern is None) or (pattern.search(path) is not None): 1082 matching_files.append(path) 1083 else: 1084 for (root, dirs, files) in os.walk(path): 1085 for f in files: 1086 if (pattern is None) or (pattern.search(f) is not None): 1087 matching_files.append(os.path.join(root, f)) 1088 if not recursive: 1089 break 1090 return matching_files

1091

1092 -class Location (object):

1093 __locationBase = None 1094 __lineNumber = None 1095 __columnNumber = None 1096

1097 - def __init__ (self, location_base=None, line_number=None, column_number=None):

1098 if isinstance(location_base, str): 1099 location_base = intern(location_base) 1100 self.__locationBase = location_base 1101 self.__lineNumber = line_number 1102 self.__columnNumber = column_number

1103

1104 - def newLocation (self, locator=None, line_number=None, column_number=None):

1105 if locator is not None: 1106 try: 1107 line_number = locator.getLineNumber() 1108 column_number = locator.getColumnNumber() 1109 except: 1110 pass 1111 return Location(self.__locationBase, line_number, column_number)

1112 1113 locationBase = property(lambda _s: _s.__locationBase) 1114 lineNumber = property(lambda _s: _s.__lineNumber) 1115 columnNumber = property(lambda _s: _s.__columnNumber) 1116

1117 - def __str__ (self):

1118 if self.locationBase is None: 1119 return '<unknownLocation>' 1120 return '%s[%s:%s]' % (self.locationBase, self.lineNumber, self.columnNumber)

1121

1122 - def __repr__ (self):

1123 t = type(self) 1124 ctor = '%s.%s' % (t.__module__, t.__name__) 1125 return '%s(%r, %r, %r)' % (ctor, self.__locationBase, self.__lineNumber, self.__columnNumber)

1126

1127 -class Locatable_mixin (pyxb.cscRoot):

1128 __location = None 1129

1130 - def __init__ (self, *args, **kw):

1131 self.__location = kw.pop('location', None) 1132 super(Locatable_mixin, self).__init__(*args, **kw)

1133

1134 - def _setLocation (self, location):

1135 self.__location = location

1136

1137 - def _location (self):

1138 return self.__location

1139

Source Code for Module pyxb.utils.utility