pyxb.utils.utility

1 # Copyright 2009, Peter A. Bigot 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); you may 4 # not use this file except in compliance with the License. You may obtain a 5 # copy of the License at: 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 # License for the specific language governing permissions and limitations 13 # under the License. 14 15 """Utility functions and classes.""" 16 17 import re 18 import os 19 import errno 20 import pyxb 21

22 -def QuotedEscaped (s):

23 """Convert a string into a literal value that can be used in Python source. 24 25 This just calls C{repr}. No point in getting all complex when the language 26 already gives us what we need. 27 28 @rtype: C{str} 29 """ 30 return repr(s)

31 32 _UnderscoreSubstitute_re = re.compile(r'[- .]') 33 _NonIdentifier_re = re.compile(r'[^a-zA-Z0-9_]') 34 _PrefixUnderscore_re = re.compile(r'^_+') 35 _PrefixDigit_re = re.compile(r'^\d+') 36 _CamelCase_re = re.compile(r'_\w') 37

38 -def MakeIdentifier (s, camel_case=False):

39 """Convert a string into something suitable to be a Python identifier. 40 41 The string is converted to unicode; spaces and periods replaced by 42 underscores; non-printable/non-ASCII stripped. Furthermore, any 43 leading underscores are removed. If the result begins with a 44 digit, the character 'n' is prepended. If the result is the empty 45 string, the string 'emptyString' is substituted. 46 47 No check is made for L{conflicts with keywords <DeconflictKeyword>}. 48 49 @keyword camel_case : If C{True}, any underscore in the result 50 string that is immediately followed by an alphanumeric is replaced 51 by the capitalized version of that alphanumeric. Thus, 52 'one_or_two' becomes 'oneOrTwo'. If C{False} (default), has no 53 effect. 54 55 @rtype: C{str} 56 """ 57 s = _PrefixUnderscore_re.sub('', _NonIdentifier_re.sub('',_UnderscoreSubstitute_re.sub('_', unicode(s)))) 58 if camel_case: 59 s = _CamelCase_re.sub(lambda _m: _m.group(0)[1].upper(), s) 60 if _PrefixDigit_re.match(s): 61 s = 'n' + s 62 if 0 == len(s): 63 s = 'emptyString' 64 return s

65 66 _PythonKeywords = frozenset( ( 67 "and", "as", "assert", "break", "class", "continue", "def", "del", 68 "elif", "else", "except", "exec", "finally", "for", "from", "global", 69 "if", "import", "in", "is", "lambda", "not", "or", "pass", "print", 70 "raise", "return", "try", "while", "with", "yield" 71 ) ) 72 """Python keywords. Note that types like int and float are not 73 keywords. 74 75 @see: U{http://docs.python.org/reference/lexical_analysis.html#keywords}.""" 76 77 _PythonBuiltInConstants = frozenset( ( 78 "False", "True", "None", "NotImplemented", "Ellipsis", "__debug__", 79 ) ) 80 """Other symbols that aren't keywords but that can't be used. 81 82 @see: U{http://docs.python.org/library/constants.html}.""" 83 84 _Keywords = frozenset(_PythonKeywords.union(_PythonBuiltInConstants)) 85 """The keywords reserved for Python, derived from L{_PythonKeywords} 86 and L{_PythonBuiltInConstants}.""" 87

88 -def DeconflictKeyword (s, aux_keywords=frozenset()):

89 """If the provided string C{s} matches a Python language keyword, 90 append an underscore to distinguish them. 91 92 See also L{MakeUnique}. 93 94 @param s: string to be deconflicted 95 96 @keyword aux_keywords: optional iterable of additional strings 97 that should be treated as keywords. 98 99 @rtype: C{str} 100 101 """ 102 if (s in _Keywords) or (s in aux_keywords): 103 return '%s_' % (s,) 104 return s

105

106 -def MakeUnique (s, in_use):

107 """Return an identifier based on C{s} that is not in the given set. 108 109 The returned identifier is made unique by appending an underscore 110 and, if necessary, a serial number. 111 112 The order is : C{x}, C{x_}, C{x_2}, C{x_3}, ... 113 114 @param in_use: The set of identifiers already in use in the 115 relevant scope. C{in_use} is updated to contain the returned 116 identifier. 117 118 @rtype: C{str} 119 """ 120 if s in in_use: 121 ctr = 2 122 s = s.rstrip('_') 123 candidate = '%s_' % (s,) 124 while candidate in in_use: 125 candidate = '%s_%d' % (s, ctr) 126 ctr += 1 127 s = candidate 128 in_use.add(s) 129 return s

130

131 -def PrepareIdentifier (s, in_use, aux_keywords=frozenset(), private=False, protected=False):

132 """Combine everything required to create a unique identifier. 133 134 Leading and trailing underscores are stripped from all 135 identifiers. 136 137 @param in_use: the set of already used identifiers. Upon return 138 from this function, it is updated to include the returned 139 identifier. 140 141 @keyword aux_keywords: an optional set of additional symbols that 142 are illegal in the given context; use this to prevent conflicts 143 with known method names. 144 145 @keyword private: if C{False} (default), all leading underscores 146 are stripped, guaranteeing the identifier will not be private. If 147 C{True}, the returned identifier has two leading underscores, 148 making it a private variable within a Python class. 149 150 @keyword protected: as for C{private}, but uses only one 151 underscore. 152 153 @rtype: C{str} 154 155 @note: Only module-level identifiers should be treated as 156 protected. The class-level L{_DeconflictSymbols_mixin} 157 infrastructure does not include protected symbols. All class and 158 instance members beginning with a single underscore are reserved 159 for the PyXB infrastructure.""" 160 s = DeconflictKeyword(MakeIdentifier(s).strip('_'), aux_keywords) 161 if private: 162 s = '__' + s 163 elif protected: 164 s = '_' + s 165 return MakeUnique(s, in_use)

166 167 # @todo: descend from pyxb.cscRoot, if we import pyxb

168 -class _DeconflictSymbols_mixin (object):

169 """Mix-in used to deconflict public symbols in classes that may be 170 inherited by generated binding classes. 171 172 Some classes, like the L{pyxb.binding.basis.element} or 173 L{pyxb.binding.basis.simpleTypeDefinition} classes in 174 L{pyxb.binding.basis}, have public symbols associated with 175 functions and variables. It is possible that an XML schema might 176 include tags and attribute names that match these symbols. To 177 avoid conflict, the reserved symbols marked in this class are 178 added to the pre-defined identifier set. 179 180 Subclasses should create a class-level variable 181 C{_ReservedSymbols} that contains a set of strings denoting the 182 symbols reserved in this class, combined with those from any 183 superclasses that also have reserved symbols. Code like the 184 following is suggested:: 185 186 # For base classes (direct mix-in): 187 _ReservedSymbols = set([ 'one', 'two' ]) 188 # For subclasses: 189 _ReservedSymbols = SuperClass._ReservedSymbols.union(set([ 'three' ])) 190 191 Only public symbols (those with no underscores) are currently 192 supported. (Private symbols can't be deconflicted that easily, 193 and no protected symbols that derive from the XML are created by 194 the binding generator.) 195 """ 196 197 _ReservedSymbols = set() 198 """There are no reserved symbols in the base class."""

199 200 # Regular expression detecting tabs, carriage returns, and line feeds 201 __TabCRLF_re = re.compile("[\t\n\r]") 202 # Regular expressoin detecting sequences of two or more spaces 203 __MultiSpace_re = re.compile(" +") 204

205 -def NormalizeWhitespace (text, preserve=False, replace=False, collapse=False):

206 """Normalize the given string. 207 208 Exactly one of the C{preserve}, C{replace}, and C{collapse} keyword 209 parameters must be assigned the value C{True} by the caller. 210 211 - C{preserve}: the text is returned unchanged. 212 213 - C{replace}: all tabs, newlines, and carriage returns are 214 replaced with ASCII spaces. 215 216 - C{collapse}: the C{replace} normalization is done, then 217 sequences of two or more spaces are replaced by a single space. 218 219 See the U{whiteSpace facet<http://www.w3.org/TR/xmlschema-2/#rf-whiteSpace>}. 220 221 @rtype: C{str} 222 """ 223 if preserve: 224 return text 225 text = __TabCRLF_re.sub(' ', text) 226 if replace: 227 return text 228 if collapse: 229 return __MultiSpace_re.sub(' ', text).strip() 230 # pyxb not imported here; could be. 231 raise Exception('NormalizeWhitespace: No normalization specified')

232

233 -class Graph:

234 """Represent a directed graph with arbitrary objects as nodes. 235 236 This is used in the L{code 237 generator<pyxb.binding.generate.Generator>} to determine order 238 dependencies among components within a namespace, and schema that 239 comprise various namespaces. An edge from C{source} to C{target} 240 indicates that some aspect of C{source} requires that some aspect 241 of C{target} already be available. 242 """ 243

244 - def __init__ (self, root=None):

245 self.__roots = None 246 if root is not None: 247 self.__roots = set([root]) 248 self.__edges = set() 249 self.__edgeMap = { } 250 self.__reverseMap = { } 251 self.__nodes = set()

252 253 __scc = None 254 __sccMap = None 255 __dfsOrder = None 256

257 - def addEdge (self, source, target):

258 """Add a directed edge from the C{source} to the C{target}. 259 260 The nodes are added to the graph if necessary. 261 """ 262 self.__edges.add( (source, target) ) 263 self.__edgeMap.setdefault(source, set()).add(target) 264 if source != target: 265 self.__reverseMap.setdefault(target, set()).add(source) 266 self.__nodes.add(source) 267 self.__nodes.add(target)

268

269 - def addNode (self, node):

270 """Add the given node to the graph.""" 271 self.__nodes.add(node)

272 273 __roots = None

274 - def roots (self, reset=False):

275 """Return the set of nodes calculated to be roots (i.e., those that have no incoming edges). 276 277 This caches the roots calculated in a previous invocation 278 unless the C{reset} keyword is given the value C{True}. 279 280 @note: Upon reset, any notes that had been manually added 281 using L{addNode} will no longer be in the set. 282 283 @keyword reset: If C{True}, any cached value is discarded and 284 recomputed. No effect if C{False} (defalut). 285 286 @rtype: C{set} 287 """ 288 if reset or (self.__roots is None): 289 self.__roots = set() 290 for n in self.__nodes: 291 if not (n in self.__reverseMap): 292 self.__roots.add(n) 293 return self.__roots

294 - def addRoot (self, root):

295 """Add the provided node as a root node, even if it has incoming edges. 296 297 The node need not be present in the graph (if necessary, it is added). 298 299 Note that roots added in this way do not survive a reset using 300 L{roots}. 301 302 @return: C{self} 303 """ 304 if self.__roots is None: 305 self.__roots = set() 306 self.__nodes.add(root) 307 self.__roots.add(root) 308 return self

309

310 - def edgeMap (self):

311 """Return the edges in the graph. 312 313 The edge data structure is a map from the source node to the 314 set of nodes that can be reached in a single step from the 315 source. 316 """ 317 return self.__edgeMap

318 __edgeMap = None 319

320 - def edges (self):

321 """Return the edges in the graph. 322 323 The edge data structure is a set of node pairs represented as C{( source, target )}. 324 """ 325 return self.__edges

326

327 - def nodes (self):

328 """Return the set of nodes in the graph. 329 330 The node collection data structure is a set containing node 331 objects, whatever they may be.""" 332 return self.__nodes

333

334 - def tarjan (self, reset=False):

335 """Execute Tarjan's algorithm on the graph. 336 337 U{Tarjan's 338 algorithm<http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm>} 339 computes the U{strongly-connected 340 components<http://en.wikipedia.org/wiki/Strongly_connected_component>} 341 of the graph: i.e., the sets of nodes that form a minimal 342 closed set under edge transition. In essence, the loops. We 343 use this to detect groups of components that have a dependency 344 cycle. 345 346 @keyword reset: If C{True}, any cached component set is erased 347 and recomputed. If C{True}, an existing previous result is 348 left unchanged.""" 349 350 if (self.__scc is not None) and (not reset): 351 return 352 self.__sccMap = { } 353 self.__stack = [] 354 self.__sccOrder = [] 355 self.__scc = [] 356 self.__index = 0 357 self.__tarjanIndex = { } 358 self.__tarjanLowLink = { } 359 for v in self.__nodes: 360 self.__tarjanIndex[v] = None 361 roots = self.roots() 362 if (0 == len(roots)) and (0 < len(self.__nodes)): 363 raise Exception('TARJAN: No roots found in graph with %d nodes' % (len(self.__nodes),)) 364 for r in roots: 365 self._tarjan(r) 366 self.__didTarjan = True

367

368 - def _tarjan (self, v):

369 """Do the work of Tarjan's algorithm for a given root node.""" 370 if self.__tarjanIndex.get(v) is not None: 371 # "Root" was already reached. 372 return 373 self.__tarjanIndex[v] = self.__tarjanLowLink[v] = self.__index 374 self.__index += 1 375 self.__stack.append(v) 376 source = v 377 for target in self.__edgeMap.get(source, []): 378 if self.__tarjanIndex[target] is None: 379 #print "Target %s not found in processed" % (target,) 380 self._tarjan(target) 381 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target]) 382 elif target in self.__stack: 383 #print "Found %s in stack" % (target,) 384 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target]) 385 else: 386 #print "No %s in stack" % (target,) 387 pass 388 389 if self.__tarjanLowLink[v] == self.__tarjanIndex[v]: 390 scc = [] 391 while True: 392 scc.append(self.__stack.pop()) 393 if v == scc[-1]: 394 break; 395 self.__sccOrder.append(scc) 396 if 1 < len(scc): 397 self.__scc.append(scc) 398 [ self.__sccMap.setdefault(_v, scc) for _v in scc ]

399 #print 'SCC at %s' % (' '.join( [str(_s) for _s in scc ]),) 400

401 - def scc (self, reset=False):

402 """Return the strongly-connected components of the graph. 403 404 The data structure is a set, each element of which is itself a 405 set containing one or more nodes from the graph. 406 407 @see: L{tarjan}. 408 """ 409 if reset or (self.__scc is None): 410 self.tarjan(reset) 411 return self.__scc

412 __scc = None 413

414 - def sccMap (self, reset=False):

415 """Return a map from nodes to the strongly-connected component 416 to which the node belongs. 417 418 @keyword reset: If C{True}, the L{tarjan} method will be 419 re-invoked, propagating the C{reset} value. If C{False} 420 (default), a cached value will be returned if available. 421 422 @see: L{tarjan}. 423 """ 424 if reset or (self.__sccMap is None): 425 self.tarjan(reset) 426 return self.__sccMap

427 __sccMap = None 428

429 - def sccOrder (self, reset=False):

430 """Return the strongly-connected components in order. 431 432 The data structure is a list, in dependency order, of strongly 433 connected components (which can be single nodes). Appearance 434 of a node in a set earlier in the list indicates that it has 435 no dependencies on any node that appears in a subsequent set. 436 This order is preferred over L{dfsOrder} for code generation, 437 since it detects loops. 438 439 @see: L{tarjan}. 440 """ 441 if reset or (self.__sccOrder is None): 442 self.tarjan(reset) 443 return self.__sccOrder

444 __sccOrder = None 445

446 - def sccForNode (self, node, **kw):

447 """Return the strongly-connected component to which the given 448 node belongs. 449 450 Any keywords suppliend when invoking this method are passed to 451 the L{sccMap} method. 452 453 @return: The SCC set, or C{None} if the node is not present in 454 the results of Tarjan's algorithm.""" 455 456 return self.sccMap(**kw).get(node, None)

457

458 - def cyclomaticComplexity (self):

459 """Return the cyclomatic complexity of the graph.""" 460 self.tarjan() 461 return len(self.__edges) - len(self.__nodes) + 2 * len(self.__scc)

462

463 - def __dfsWalk (self, source):

464 assert not (source in self.__dfsWalked) 465 self.__dfsWalked.add(source) 466 for target in self.__edgeMap.get(source, []): 467 if not (target in self.__dfsWalked): 468 self.__dfsWalk(target) 469 self.__dfsOrder.append(source)

470

471 - def _generateDOT (self, title='UNKNOWN', labeller=None):

472 node_map = { } 473 idx = 1 474 for n in self.__nodes: 475 node_map[n] = idx 476 idx += 1 477 text = [] 478 text.append('digraph "%s" {' % (title,)) 479 for n in self.__nodes: 480 if labeller is not None: 481 nn = labeller(n) 482 else: 483 nn = str(n) 484 text.append('%s [shape=box,label="%s"];' % (node_map[n], nn)) 485 for s in self.__nodes: 486 for d in self.__edgeMap.get(s, []): 487 if s != d: 488 text.append('%s -> %s;' % (node_map[s], node_map[d])) 489 text.append("};") 490 return "\n".join(text)

491

492 - def dfsOrder (self, reset=False):

493 """Return the nodes of the graph in U{depth-first-search 494 order<http://en.wikipedia.org/wiki/Depth-first_search>}. 495 496 The data structure is a list. Calculated lists are retained 497 and returned on future invocations, subject to the C{reset} 498 keyword. 499 500 @keyword reset: If C{True}, discard cached results and recompute the order.""" 501 if reset or (self.__dfsOrder is None): 502 self.__dfsWalked = set() 503 self.__dfsOrder = [] 504 for root in self.roots(reset=reset): 505 self.__dfsWalk(root) 506 self.__dfsWalked = None 507 if len(self.__dfsOrder) != len(self.__nodes): 508 raise Exception('DFS walk did not cover all nodes (walk %d versus nodes %d)' % (len(self.__dfsOrder), len(self.__nodes))) 509 return self.__dfsOrder

510 511 LocationPrefixRewriteMap_ = { } 512

513 -def SetLocationPrefixRewriteMap (prefix_map):

514 """Set the map that is used to by L{NormalizeLocation} to rewrite URI prefixes.""" 515 516 LocationPrefixRewriteMap_.clear() 517 LocationPrefixRewriteMap_.update(prefix_map)

518

519 -def NormalizeLocation (uri, parent_uri=None, prefix_map=None):

520 """Normalize a URI against an optional parent_uri in the way that is 521 done for C{schemaLocation} attribute values. 522 523 If no URI schema is present, this will normalize a file system 524 path. 525 526 Optionally, the resulting absolute URI can subsequently be 527 rewritten to replace specified prefix strings with alternative 528 strings, e.g. to convert a remote URI to a local repository. This 529 rewriting is done after the conversion to an absolute URI, but 530 before normalizing file system URIs. 531 532 @param uri : The URI to normalize. If C{None}, function returns 533 C{None} 534 535 @param parent_uri : The base URI against which normalization is 536 done, if C{uri} is a relative URI. 537 538 @param prefix_map : A map used to rewrite URI prefixes. If 539 C{None}, the value defaults to that stored by 540 L{SetLocationPrefixRewriteMap}. 541 542 """ 543 import urlparse 544 import os 545 546 if uri is None: 547 return uri 548 if parent_uri is None: 549 abs_uri = uri 550 else: 551 #if (0 > parent_uri.find(':')) and (not parent_uri.endswith(os.sep)): 552 # parent_uri = parent_uri + os.sep 553 abs_uri = urlparse.urljoin(parent_uri, uri) 554 if prefix_map is None: 555 prefix_map = LocationPrefixRewriteMap_ 556 for (pfx, sub) in prefix_map.items(): 557 if abs_uri.startswith(pfx): 558 abs_uri = sub + abs_uri[len(pfx):] 559 if 0 > abs_uri.find(':'): 560 abs_uri = os.path.realpath(abs_uri) 561 return abs_uri

562 563 import urlparse 564

565 -def TextFromURI (uri, archive_directory=None):

566 """Retrieve the contents of the uri as a text string. 567 568 If the uri does not include a scheme (e.g., C{http:}), it is 569 assumed to be a file path on the local system.""" 570 import urllib 571 import urllib2 572 stream = None 573 exc = None 574 # Only something that has a colon is a non-file URI. Some things 575 # that have a colon are a file URI (sans schema). Prefer urllib2, 576 # but allow urllib (which apparently works better on Windows). 577 if 0 <= uri.find(':'): 578 try: 579 stream = urllib2.urlopen(uri) 580 except Exception, e: 581 exc = e 582 if stream is None: 583 try: 584 stream = urllib.urlopen(uri) 585 exc = None 586 except: 587 # Prefer urllib exception 588 pass 589 if stream is None: 590 # No go as URI; give file a chance 591 try: 592 stream = file(uri) 593 exc = None 594 except Exception, e: 595 if exc is None: 596 exc = e 597 if exc is not None: 598 print 'TextFromURI: open %s caught: %s' % (uri, exc) 599 raise exc 600 try: 601 # Protect this in case whatever stream is doesn't have an fp 602 # attribute. 603 if isinstance(stream, file) or isinstance(stream.fp, file): 604 archive_directory = None 605 except: 606 pass 607 xmls = stream.read() 608 if archive_directory: 609 base_name = os.path.basename(os.path.normpath(urlparse.urlparse(uri)[2])) 610 counter = 1 611 dest_file = os.path.join(archive_directory, base_name) 612 while os.path.isfile(dest_file): 613 dest_file = os.path.join(archive_directory, '%s.%d' % (base_name, counter)) 614 counter += 1 615 try: 616 OpenOrCreate(dest_file).write(xmls) 617 except OSError, e: 618 print 'WARNING: Unable to save %s in %s: %s' % (uri, dest_file, e) 619 return xmls

620

621 -def OpenOrCreate (file_name, tag=None, preserve_contents=False):

622 """Return a file object used to write the given file. 623 624 Use the C{tag} keyword to preserve the contents of existing files 625 that are not supposed to be overwritten. 626 627 To get a writable file but leaving any existing contents in place, 628 set the C{preserve_contents} keyword to C{True}. Normally, existing file 629 contents are erased. 630 631 The returned file pointer is positioned at the end of the file. 632 633 @keyword tag: If not C{None} and the file already exists, absence 634 of the given value in the first 4096 bytes of the file causes an 635 C{IOError} to be raised with C{errno} set to C{EEXIST}. I.e., 636 only files with this value in the first 4KB will be returned for 637 writing. 638 639 @keyword preserve_contents: This value controls whether existing 640 contents of the file will be erased (C{False}, default) or left in 641 place (C{True}). 642 """ 643 (path, leaf) = os.path.split(file_name) 644 if path: 645 try: 646 os.makedirs(path) 647 except Exception, e: 648 if not (isinstance(e, (OSError, IOError)) and (errno.EEXIST == e.errno)): 649 raise 650 fp = file(file_name, 'ab+') 651 if (tag is not None) and (0 < os.fstat(fp.fileno()).st_size): 652 text = fp.read(4096) 653 if 0 > text.find(tag): 654 raise OSError(errno.EEXIST, os.strerror(errno.EEXIST)) 655 if not preserve_contents: 656 fp.seek(0) # os.SEEK_SET 657 fp.truncate() 658 else: 659 fp.seek(2) # os.SEEK_END 660 return fp

661 662 # hashlib didn't show up until 2.5, and sha is deprecated in 2.6. 663 __Hasher = None 664 try: 665 import hashlib 666 __Hasher = hashlib.sha1 667 except ImportError: 668 import sha 669 __Hasher = sha.new 670

671 -def HashForText (text):

672 """Calculate a cryptographic hash of the given string. 673 674 For example, this is used to verify that a given module file 675 contains bindings from a previous generation run for the same 676 namespace. See L{OpenOrCreate}. 677 678 @return: A C{str}, generally a sequence of hexadecimal "digit"s. 679 """ 680 if isinstance(text, unicode): 681 text = text.encode('utf-8') 682 return __Hasher(text).hexdigest()

683 684 # uuid didn't show up until 2.5 685 __HaveUUID = False 686 try: 687 import uuid 688 __HaveUUID = True 689 except ImportError: 690 import time 691 import random

692 -def _NewUUIDString ():

693 """Obtain a UUID using the best available method. On a version of 694 python that does not incorporate the C{uuid} class, this creates a 695 string combining the current date and time (to the second) with a 696 random number. 697 698 @rtype: C{str} 699 """ 700 if __HaveUUID: 701 return uuid.uuid1().urn 702 return '%s:%08.8x' % (time.strftime('%Y%m%d%H%M%S'), random.randint(0, 0xFFFFFFFFL))

703

704 -class UniqueIdentifier (object):

705 """Records a unique identifier, generally associated with a 706 binding generation action. 707 708 The identifier is a string, but gets wrapped in an instance of 709 this class to optimize comparisons and reduce memory footprint. 710 711 Invoking the constructor for this class on the same string 712 multiple times will return the same Python object. 713 714 An instance of this class compares equal to, and hashes equivalent 715 to, the uid string. When C{str}'d, the result is the uid; when 716 C{repr}'d, the result is a constructor call to 717 C{pyxb.utils.utility.UniqueIdentifier}. 718 """ 719 720 # A map from UID string to the instance that represents it 721 __ExistingUIDs = {} 722

723 - def uid (self):

724 """The string unique identifier""" 725 return self.__uid

726 __uid = None 727 728 # Support pickling, which is done using only the UID.

729 - def __getnewargs__ (self):

730 return (self.__uid,)

731

732 - def __getstate__ (self):

733 return self.__uid

734

735 - def __setstate__ (self, state):

736 assert self.__uid == state

737 738 # Singleton-like

739 - def __new__ (cls, *args):

740 if 0 == len(args): 741 uid = _NewUUIDString() 742 else: 743 uid = args[0] 744 if isinstance(uid, UniqueIdentifier): 745 uid = uid.uid() 746 if not isinstance(uid, basestring): 747 raise TypeError('UniqueIdentifier uid must be a string') 748 rv = cls.__ExistingUIDs.get(uid) 749 if rv is None: 750 rv = super(UniqueIdentifier, cls).__new__(cls) 751 rv.__uid = uid 752 cls.__ExistingUIDs[uid] = rv 753 return rv

754

755 - def associateObject (self, obj):

756 """Associate the given object witth this identifier. 757 758 This is a one-way associatoin: the object is not provided with 759 a return path to this identifier instance.""" 760 self.__associatedObjects.add(obj)

761 - def associatedObjects (self):

762 """The set of objects that have been associated with this 763 identifier instance.""" 764 return self.__associatedObjects

765 __associatedObjects = None 766

767 - def __init__ (self, uid=None):

768 """Create a new UniqueIdentifier instance. 769 770 @param uid: The unique identifier string. If present, it is 771 the callers responsibility to ensure the value is universally 772 unique. If C{None}, one will be provided. 773 @type uid: C{str} or C{unicode} 774 """ 775 assert (uid is None) or (self.uid() == uid), 'UniqueIdentifier: ctor %s, actual %s' % (uid, self.uid()) 776 self.__associatedObjects = set()

777

778 - def __eq__ (self, other):

779 if other is None: 780 return False 781 elif isinstance(other, UniqueIdentifier): 782 other_uid = other.uid() 783 elif isinstance(other, basestring): 784 other_uid = other 785 else: 786 raise TypeError('UniqueIdentifier: Cannot compare with type %s' % (type(other),)) 787 return self.uid() == other_uid

788

789 - def __hash__ (self):

790 return hash(self.uid())

791

792 - def __str__ (self):

793 return self.uid()

794

795 - def __repr__ (self):

796 return 'pyxb.utils.utility.UniqueIdentifier(%s)' % (repr(self.uid()),)

797 798 import datetime 799 import calendar 800 import time

801 -class UTCOffsetTimeZone (datetime.tzinfo):

802 """A C{datetime.tzinfo} subclass that helps deal with UTC 803 conversions in an ISO8601 world. 804 805 This class only supports fixed offsets from UTC. 806 """ 807 808 # Regular expression that matches valid ISO8601 time zone suffixes 809 __Lexical_re = re.compile('^([-+])(\d\d):(\d\d)$') 810 811 # The offset in minutes east of UTC. 812 __utcOffset_min = 0 813 814 # Same as __utcOffset_min, but as a datetime.timedelta 815 __utcOffset_td = None 816 817 # A zero-length duration 818 __ZeroDuration = datetime.timedelta(0) 819

820 - def __init__ (self, spec=None, flip=False):

821 """Create a time zone instance with a fixed offset from UTC. 822 823 @param spec: Specifies the offset. Can be an integer counting 824 minutes east of UTC, the value C{None} (equal to 0 minutes 825 east), or a string that conform to the ISO8601 time zone 826 sequence (B{Z}, or B{[+-]HH:MM}). 827 828 @param flip: If C{False} (default), no adaptation is done. If 829 C{True}, the time zone offset is negated, resulting in the 830 conversion from localtime to UTC rather than the default of 831 UTC to localtime. 832 """ 833 834 if spec is not None: 835 if isinstance(spec, basestring): 836 if 'Z' == spec: 837 self.__utcOffset_min = 0 838 else: 839 match = self.__Lexical_re.match(spec) 840 if match is None: 841 raise ValueError('Bad time zone: %s' % (spec,)) 842 self.__utcOffset_min = int(match.group(2)) * 60 + int(match.group(3)) 843 if '-' == match.group(1): 844 self.__utcOffset_min = - self.__utcOffset_min 845 elif isinstance(spec, int): 846 self.__utcOffset_min = spec 847 elif isinstance(spec, datetime.timedelta): 848 self.__utcOffset_min = spec.seconds / 60 849 else: 850 raise TypeError('%s: unexpected type %s' % (type(self), type(spec))) 851 if flip: 852 self.__utcOffset_min = - self.__utcOffset_min 853 self.__utcOffset_td = datetime.timedelta(minutes=self.__utcOffset_min) 854 if 0 == self.__utcOffset_min: 855 self.__tzName = 'Z' 856 elif 0 > self.__utcOffset_min: 857 self.__tzName = '-%02d%02d' % divmod(-self.__utcOffset_min, 60) 858 else: 859 self.__tzName = '+%02d%02d' % divmod(self.__utcOffset_min, 60)

860

861 - def utcoffset (self, dt):

862 """Returns the constant offset for this zone.""" 863 return self.__utcOffset_td

864

865 - def tzname (self, dt):

866 """Return the name of the timezone in ISO8601 format.""" 867 return self.__tzName

868

869 - def dst (self, dt):

870 """Returns a constant zero duration.""" 871 return self.__ZeroDuration

872

873 -class LocalTimeZone (datetime.tzinfo):

874 """A C{datetime.tzinfo} subclass for the local time zone. 875 876 Mostly pinched from the C{datetime.tzinfo} documentation in Python 2.5.1. 877 """ 878 879 __STDOffset = datetime.timedelta(seconds=-time.timezone) 880 __DSTOffset = __STDOffset 881 if time.daylight: 882 __DSTOffset = datetime.timedelta(seconds=-time.altzone) 883 __ZeroDelta = datetime.timedelta(0) 884 __DSTDelta = __DSTOffset - __STDOffset 885

886 - def utcoffset (self, dt):

887 if self.__isDST(dt): 888 return self.__DSTOffset 889 return self.__STDOffset

890

891 - def dst (self, dt):

892 if self.__isDST(dt): 893 return self.__DSTDelta 894 return self.__ZeroDelta

895

896 - def tzname (self, dt):

897 return time.tzname[self.__isDST(dt)]

898

899 - def __isDST (self, dt):

900 tt = (dt.year, dt.month, dt.day, 901 dt.hour, dt.minute, dt.second, 902 0, 0, -1) 903 tt = time.localtime(time.mktime(tt)) 904 return tt.tm_isdst > 0

905

906 -class PrivateTransient_mixin (pyxb.cscRoot):

907 """Emulate the B{transient} keyword from Java for private member 908 variables. 909 910 This class defines a C{__getstate__} method which returns a copy 911 of C{self.__dict__} with certain members removed. Specifically, 912 if a string "s" appears in a class member variable named 913 C{__PrivateTransient} defined in the "Class" class, then the 914 corresponding private variable "_Class__s" will be removed from 915 the state dictionary. This is used to eliminate unnecessary 916 fields from instances placed in L{namespace 917 archives<pyxb.namespace.archive.NamespaceArchive>} without having 918 to implement a C{__getstate__} method in every class in the 919 instance hierarchy. 920 921 For an example, see 922 L{pyxb.xmlschema.structures._SchemaComponent_mixin} 923 924 If you use this, it is your responsibility to define the 925 C{__PrivateTransient} class variable and add to it the required 926 variable names. 927 928 Classes that inherit from this are free to define their own 929 C{__getstate__} method, which may or may not invoke the superclass 930 one. If you do this, be sure that the class defining 931 C{__getstate__} lists L{PrivateTransient_mixin} as one of its 932 direct superclasses, lest the latter end up earlier in the mro and 933 consequently bypass the local override. 934 """ 935 936 # Suffix used when creating the class member variable in which the 937 # transient members are cached. 938 __Attribute = '__PrivateTransient' 939

940 - def __getstate__ (self):

941 state = self.__dict__.copy() 942 # Note that the aggregate set is stored in a class variable 943 # with a slightly different name than the class-level set. 944 attr = '_%s%s_' % (self.__class__.__name__, self.__Attribute) 945 skipped = getattr(self.__class__, attr, None) 946 if skipped is None: 947 skipped = set() 948 for cl in self.__class__.mro(): 949 for (k, v) in cl.__dict__.items(): 950 if k.endswith(self.__Attribute): 951 cl2 = k[:-len(self.__Attribute)] 952 skipped.update([ '%s__%s' % (cl2, _n) for _n in v ]) 953 setattr(self.__class__, attr, skipped) 954 #print 'Defined skipped for %s: %s' % (self.__class__, skipped) 955 for k in skipped: 956 if state.get(k) is not None: 957 #print 'Stripping %s from instance %x of %s' % (k, id(self), type(self)) 958 del state[k] 959 # Uncomment the following to test whether undesirable types 960 # are being pickled, generally by accidently leaving a 961 # reference to one in an instance private member. 962 #for (k, v) in state.items(): 963 # import pyxb.namespace 964 # import xml.dom 965 # import pyxb.xmlschema.structures 966 # if isinstance(v, (pyxb.namespace.resolution.NamespaceContext, xml.dom.Node, pyxb.xmlschema.structures.Schema)): 967 # raise pyxb.LogicError('Unexpected instance of %s key %s in %s' % (type(v), k, self)) 968 969 return state

970

971 -def GetMatchingFiles (path, pattern=None, default_path_wildcard=None, default_path=None, prefix_pattern=None, prefix_substituend=None):

972 """Provide a list of absolute paths to files present in any of a 973 set of directories and meeting certain criteria. 974 975 This is used, for example, to locate namespace archive files 976 within the archive path specified by the user. One could use:: 977 978 files = GetMatchingFiles('&bundles//:+', 979 pattern=re.compile('.*\.wxs$'), 980 default_path_wildcard='+', 981 default_path='/usr/local/pyxb/nsarchives', 982 prefix_pattern='&', 983 prefix_substituend='/opt/pyxb') 984 985 to obtain all files that can be recursively found within 986 C{/opt/pyxb/bundles}, or non-recursively within 987 C{/usr/local/pyxb/nsarchives}. 988 989 @param path: A colon separated list of directories in which the 990 search should be performed. If a path entry ends with C{//}, any 991 directory beneath it is scanned as well, recursively. 992 993 @keyword pattern: Optional regular expression object used to 994 determine whether a given directory entry should be returned. If 995 left as C{None}, all directory entries will be returned. 996 997 @keyword default_path_wildcard: An optional string which, if 998 present as a single directory in the path, is replaced by the 999 value of C{default-path}. 1000 1001 @keyword default_path: A system-defined directory which can be 1002 restored to the path by placing the C{default_path_wildcard} in 1003 the C{path}. 1004 1005 @keyword prefix_pattern: An optional string which, if present at 1006 the start of a path element, is replaced by the value of 1007 C{prefix_substituend}. 1008 1009 @keyword prefix_substituend: A system-defined string (path prefix) 1010 which can be combined with the user-provided path information to 1011 identify a file or subdirectory within an installation-specific 1012 area. 1013 """ 1014 matching_files = [] 1015 path_set = path.split(':') 1016 while 0 < len(path_set): 1017 path = path_set.pop(0) 1018 if default_path_wildcard == path: 1019 if default_path is not None: 1020 path_set[0:0] = default_path.split(':') 1021 default_path = None 1022 continue 1023 recursive = False 1024 if (prefix_pattern is not None) and path.startswith(prefix_pattern): 1025 path = os.path.join(prefix_substituend, path[len(prefix_pattern):]) 1026 if path.endswith('//'): 1027 recursive = True 1028 path = path[:-2] 1029 if os.path.isfile(path): 1030 if (pattern is None) or (pattern.search(path) is not None): 1031 matching_files.append(path) 1032 else: 1033 for (root, dirs, files) in os.walk(path): 1034 for f in files: 1035 if (pattern is None) or (pattern.search(f) is not None): 1036 matching_files.append(os.path.join(root, f)) 1037 if not recursive: 1038 break 1039 return matching_files

1040

1041 -class _LocationBase (object):

1042 """Wrap a location. 1043 1044 This is probably a string, but might be a uri object or the like. 1045 Really we only have this as a separate object so as to avoid 1046 creating hundreds of copies of the same string.""" 1047 1048 __locationBase = None

1049 - def locationBase (self):

1050 return self.__locationBase

1051

1052 - def __init__ (self, location_base):

1053 if isinstance(location_base, _LocationBase): 1054 location_base = location_base.locationBase() 1055 self.__locationBase = location_base

1056

1057 - def __str__ (self):

1058 return str(self.__locationBase)

1059

1060 -class Location (object):

1061 __locationBase = None 1062 __lineNumber = None 1063 __columnNumber = None 1064

1065 - def __init__ (self, location_base=None, line_number=None, column_number=None):

1066 if not isinstance(location_base, _LocationBase): 1067 location_base = _LocationBase(location_base) 1068 self.__locationBase = location_base 1069 self.__lineNumber = line_number 1070 self.__columnNumber = column_number

1071

1072 - def newLocation (self, locator=None, line_number=None, column_number=None):

1073 if locator is not None: 1074 try: 1075 line_number = locator.getLineNumber() 1076 column_number = locator.getColumnNumber() 1077 except: 1078 pass 1079 return Location(self.__locationBase, line_number, column_number)

1080 1081 locationBase = property(lambda _s: _s.__locationBase) 1082 lineNumber = property(lambda _s: _s.__lineNumber) 1083 columnNumber = property(lambda _s: _s.__columnNumber) 1084

1085 - def __str__ (self):

1086 if self.locationBase is None: 1087 return '<unknownLocation>' 1088 return '%s[%s:%s]' % (self.locationBase, self.lineNumber, self.columnNumber)

1089

1090 -class Locatable_mixin (pyxb.cscRoot):

1091 __location = None 1092

1093 - def __init__ (self, *args, **kw):

1094 self.__location = kw.pop('location', None) 1095 super(Locatable_mixin, self).__init__(*args, **kw)

1096

1097 - def _setLocation (self, location):

1098 self.__location = location

1099

1100 - def _location (self):

1101 return self.__location

1102

Source Code for Module pyxb.utils.utility