pyxb.utils.utility

1 # Copyright 2009, Peter A. Bigot 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); you may 4 # not use this file except in compliance with the License. You may obtain a 5 # copy of the License at: 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 # License for the specific language governing permissions and limitations 13 # under the License. 14 15 """Utility functions and classes.""" 16 17 import re 18 import os 19 import errno 20 import pyxb 21

22 -def QuotedEscaped (s):

23 """Convert a string into a literal value that can be used in Python source. 24 25 This just calls C{repr}. No point in getting all complex when the language 26 already gives us what we need. 27 28 @rtype: C{str} 29 """ 30 return repr(s)

31 32 _UnderscoreSubstitute_re = re.compile(r'[- .]') 33 _NonIdentifier_re = re.compile(r'[^a-zA-Z0-9_]') 34 _PrefixUnderscore_re = re.compile(r'^_+') 35 _PrefixDigit_re = re.compile(r'^\d+') 36

37 -def MakeIdentifier (s):

38 """Convert a string into something suitable to be a Python identifier. 39 40 The string is converted to unicode; spaces and periods replaced by 41 underscores; non-printables stripped. Furthermore, any leading 42 underscores are removed. If the result begins with a digit, the 43 character 'n' is prepended. If the result is the empty string, 44 the string 'emptyString' is substituted. 45 46 No check is made for L{conflicts with keywords <DeconflictKeyword>}. 47 48 @rtype: C{str} 49 """ 50 s = _PrefixUnderscore_re.sub('', _NonIdentifier_re.sub('',_UnderscoreSubstitute_re.sub('_', str(s)))) 51 if _PrefixDigit_re.match(s): 52 s = 'n' + s 53 if 0 == len(s): 54 s = 'emptyString' 55 return s

56 57 # Note that types like int and float are not keywords 58 _Keywords = frozenset( ( "and", "del", "from", "not", "while", "as", "elif", "global", 59 "or", "with", "assert", "else", "if", "pass", "yield", 60 "break", "except", "import", "print", "class", "exec", 61 "in", "raise", "continue", "finally", "is", "return", 62 "def", "for", "lambda", "try", 63 "None" ) ) 64 """The keywords reserved for Python.""" 65

66 -def DeconflictKeyword (s, aux_keywords=frozenset()):

67 """If the provided string C{s} matches a Python language keyword, 68 append an underscore to distinguish them. 69 70 See also L{MakeUnique}. 71 72 @param s: string to be deconflicted 73 74 @keyword aux_keywords: optional iterable of additional strings 75 that should be treated as keywords. 76 77 @rtype: C{str} 78 79 """ 80 if (s in _Keywords) or (s in aux_keywords): 81 return '%s_' % (s,) 82 return s

83

84 -def MakeUnique (s, in_use):

85 """Return an identifier based on C{s} that is not in the given set. 86 87 The returned identifier is made unique by appending an underscore 88 and, if necessary, a serial number. 89 90 The order is : C{x}, C{x_}, C{x_2}, C{x_3}, ... 91 92 @param in_use: The set of identifiers already in use in the 93 relevant scope. C{in_use} is updated to contain the returned 94 identifier. 95 96 @rtype: C{str} 97 """ 98 if s in in_use: 99 ctr = 2 100 s = s.rstrip('_') 101 candidate = '%s_' % (s,) 102 while candidate in in_use: 103 candidate = '%s_%d' % (s, ctr) 104 ctr += 1 105 s = candidate 106 in_use.add(s) 107 return s

108

109 -def PrepareIdentifier (s, in_use, aux_keywords=frozenset(), private=False, protected=False):

110 """Combine everything required to create a unique identifier. 111 112 Leading and trailing underscores are stripped from all 113 identifiers. 114 115 @param in_use: the set of already used identifiers. Upon return 116 from this function, it is updated to include the returned 117 identifier. 118 119 @keyword aux_keywords: an optional set of additional symbols that 120 are illegal in the given context; use this to prevent conflicts 121 with known method names. 122 123 @keyword private: if C{False} (default), all leading underscores 124 are stripped, guaranteeing the identifier will not be private. If 125 C{True}, the returned identifier has two leading underscores, 126 making it a private variable within a Python class. 127 128 @keyword protected: as for C{private}, but uses only one 129 underscore. 130 131 @rtype: C{str} 132 133 @note: Only module-level identifiers should be treated as 134 protected. The class-level L{_DeconflictSymbols_mixin} 135 infrastructure does not include protected symbols. All class and 136 instance members beginning with a single underscore are reserved 137 for the PyXB infrastructure.""" 138 s = DeconflictKeyword(MakeIdentifier(s).strip('_'), aux_keywords) 139 if private: 140 s = '__' + s 141 elif protected: 142 s = '_' + s 143 return MakeUnique(s, in_use)

144 145 # @todo: descend from pyxb.cscRoot, if we import pyxb

146 -class _DeconflictSymbols_mixin (object):

147 """Mix-in used to deconflict public symbols in classes that may be 148 inherited by generated binding classes. 149 150 Some classes, like the L{pyxb.binding.basis.element} or 151 L{pyxb.binding.basis.simpleTypeDefinition} classes in 152 L{pyxb.binding.basis}, have public symbols associated with 153 functions and variables. It is possible that an XML schema might 154 include tags and attribute names that match these symbols. To 155 avoid conflict, the reserved symbols marked in this class are 156 added to the pre-defined identifier set. 157 158 Subclasses should create a class-level variable 159 C{_ReservedSymbols} that contains a set of strings denoting the 160 symbols reserved in this class, combined with those from any 161 superclasses that also have reserved symbols. Code like the 162 following is suggested:: 163 164 # For base classes (direct mix-in): 165 _ReservedSymbols = set([ 'one', 'two' ]) 166 # For subclasses: 167 _ReservedSymbols = SuperClass._ReservedSymbols.union(set([ 'three' ])) 168 169 Only public symbols (those with no underscores) are currently 170 supported. (Private symbols can't be deconflicted that easily, 171 and no protected symbols that derive from the XML are created by 172 the binding generator.) 173 """ 174 175 _ReservedSymbols = set() 176 """There are no reserved symbols in the base class."""

177 178 # Regular expression detecting tabs, carriage returns, and line feeds 179 __TabCRLF_re = re.compile("[\t\n\r]") 180 # Regular expressoin detecting sequences of two or more spaces 181 __MultiSpace_re = re.compile(" +") 182

183 -def NormalizeWhitespace (text, preserve=False, replace=False, collapse=False):

184 """Normalize the given string. 185 186 Exactly one of the C{preserve}, C{replace}, and C{collapse} keyword 187 parameters must be assigned the value C{True} by the caller. 188 189 - C{preserve}: the text is returned unchanged. 190 191 - C{replace}: all tabs, newlines, and carriage returns are 192 replaced with ASCII spaces. 193 194 - C{collapse}: the C{replace} normalization is done, then 195 sequences of two or more spaces are replaced by a single space. 196 197 See the U{whiteSpace facet<http://www.w3.org/TR/xmlschema-2/#rf-whiteSpace>}. 198 199 @rtype: C{str} 200 """ 201 if preserve: 202 return text 203 text = __TabCRLF_re.sub(' ', text) 204 if replace: 205 return text 206 if collapse: 207 return __MultiSpace_re.sub(' ', text).strip() 208 # pyxb not imported here; could be. 209 raise Exception('NormalizeWhitespace: No normalization specified')

210

211 -class Graph:

212 """Represent a directed graph with arbitrary objects as nodes. 213 214 This is used in the L{code 215 generator<pyxb.binding.generate.Generator>} to determine order 216 dependencies among components within a namespace, and schema that 217 comprise various namespaces. An edge from C{source} to C{target} 218 indicates that some aspect of C{source} requires that some aspect 219 of C{target} already be available. 220 """ 221

222 - def __init__ (self, root=None):

223 self.__roots = None 224 if root is not None: 225 self.__roots = set([root]) 226 self.__edges = set() 227 self.__edgeMap = { } 228 self.__reverseMap = { } 229 self.__nodes = set()

230 231 __scc = None 232 __sccMap = None 233 __dfsOrder = None 234

235 - def addEdge (self, source, target):

236 """Add a directed edge from the C{source} to the C{target}. 237 238 The nodes are added to the graph if necessary. 239 """ 240 self.__edges.add( (source, target) ) 241 self.__edgeMap.setdefault(source, set()).add(target) 242 if source != target: 243 self.__reverseMap.setdefault(target, set()).add(source) 244 self.__nodes.add(source) 245 self.__nodes.add(target)

246

247 - def addNode (self, node):

248 """Add the given node to the graph.""" 249 self.__nodes.add(node)

250 251 __roots = None

252 - def roots (self, reset=False):

253 """Return the set of nodes calculated to be roots (i.e., those that have no incoming edges). 254 255 This caches the roots calculated in a previous invocation 256 unless the C{reset} keyword is given the value C{True}. 257 258 @note: Upon reset, any notes that had been manually added 259 using L{addNode} will no longer be in the set. 260 261 @keyword reset: If C{True}, any cached value is discarded and 262 recomputed. No effect if C{False} (defalut). 263 264 @rtype: C{set} 265 """ 266 if reset or (self.__roots is None): 267 self.__roots = set() 268 for n in self.__nodes: 269 if not (n in self.__reverseMap): 270 self.__roots.add(n) 271 return self.__roots

272 - def addRoot (self, root):

273 """Add the provided node as a root node, even if it has incoming edges. 274 275 The node need not be present in the graph (if necessary, it is added). 276 277 Note that roots added in this way do not survive a reset using 278 L{roots}. 279 280 @return: C{self} 281 """ 282 if self.__roots is None: 283 self.__roots = set() 284 self.__nodes.add(root) 285 self.__roots.add(root) 286 return self

287

288 - def edgeMap (self):

289 """Return the edges in the graph. 290 291 The edge data structure is a map from the source node to the 292 set of nodes that can be reached in a single step from the 293 source. 294 """ 295 return self.__edgeMap

296 __edgeMap = None 297

298 - def edges (self):

299 """Return the edges in the graph. 300 301 The edge data structure is a set of node pairs represented as C{( source, target )}. 302 """ 303 return self.__edges

304

305 - def nodes (self):

306 """Return the set of nodes in the graph. 307 308 The node collection data structure is a set containing node 309 objects, whatever they may be.""" 310 return self.__nodes

311

312 - def tarjan (self, reset=False):

313 """Execute Tarjan's algorithm on the graph. 314 315 U{Tarjan's 316 algorithm<http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm>} 317 computes the U{strongly-connected 318 components<http://en.wikipedia.org/wiki/Strongly_connected_component>} 319 of the graph: i.e., the sets of nodes that form a minimal 320 closed set under edge transition. In essence, the loops. We 321 use this to detect groups of components that have a dependency 322 cycle. 323 324 @keyword reset: If C{True}, any cached component set is erased 325 and recomputed. If C{True}, an existing previous result is 326 left unchanged.""" 327 328 if (self.__scc is not None) and (not reset): 329 return 330 self.__sccMap = { } 331 self.__stack = [] 332 self.__sccOrder = [] 333 self.__scc = [] 334 self.__index = 0 335 self.__tarjanIndex = { } 336 self.__tarjanLowLink = { } 337 for v in self.__nodes: 338 self.__tarjanIndex[v] = None 339 roots = self.roots() 340 if (0 == len(roots)) and (0 < len(self.__nodes)): 341 raise Exception('TARJAN: No roots found in graph with %d nodes' % (len(self.__nodes),)) 342 for r in roots: 343 self._tarjan(r) 344 self.__didTarjan = True

345

346 - def _tarjan (self, v):

347 """Do the work of Tarjan's algorithm for a given root node.""" 348 if self.__tarjanIndex.get(v) is not None: 349 # "Root" was already reached. 350 return 351 self.__tarjanIndex[v] = self.__tarjanLowLink[v] = self.__index 352 self.__index += 1 353 self.__stack.append(v) 354 source = v 355 for target in self.__edgeMap.get(source, []): 356 if self.__tarjanIndex[target] is None: 357 #print "Target %s not found in processed" % (target,) 358 self._tarjan(target) 359 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target]) 360 elif target in self.__stack: 361 #print "Found %s in stack" % (target,) 362 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target]) 363 else: 364 #print "No %s in stack" % (target,) 365 pass 366 367 if self.__tarjanLowLink[v] == self.__tarjanIndex[v]: 368 scc = [] 369 while True: 370 scc.append(self.__stack.pop()) 371 if v == scc[-1]: 372 break; 373 self.__sccOrder.append(scc) 374 if 1 < len(scc): 375 self.__scc.append(scc) 376 [ self.__sccMap.setdefault(_v, scc) for _v in scc ]

377 #print 'SCC at %s' % (' '.join( [str(_s) for _s in scc ]),) 378

379 - def scc (self, reset=False):

380 """Return the strongly-connected components of the graph. 381 382 The data structure is a set, each element of which is itself a 383 set containing one or more nodes from the graph. 384 385 @see: L{tarjan}. 386 """ 387 if reset or (self.__scc is None): 388 self.tarjan(reset) 389 return self.__scc

390 __scc = None 391

392 - def sccMap (self, reset=False):

393 """Return a map from nodes to the strongly-connected component 394 to which the node belongs. 395 396 @keyword reset: If C{True}, the L{tarjan} method will be 397 re-invoked, propagating the C{reset} value. If C{False} 398 (default), a cached value will be returned if available. 399 400 @see: L{tarjan}. 401 """ 402 if reset or (self.__sccMap is None): 403 self.tarjan(reset) 404 return self.__sccMap

405 __sccMap = None 406

407 - def sccOrder (self, reset=False):

408 """Return the strongly-connected components in order. 409 410 The data structure is a list, in dependency order, of strongly 411 connected components (which can be single nodes). Appearance 412 of a node in a set earlier in the list indicates that it has 413 no dependencies on any node that appears in a subsequent set. 414 This order is preferred over L{dfsOrder} for code generation, 415 since it detects loops. 416 417 @see: L{tarjan}. 418 """ 419 if reset or (self.__sccOrder is None): 420 self.tarjan(reset) 421 return self.__sccOrder

422 __sccOrder = None 423

424 - def sccForNode (self, node, **kw):

425 """Return the strongly-connected component to which the given 426 node belongs. 427 428 Any keywords suppliend when invoking this method are passed to 429 the L{sccMap} method. 430 431 @return: The SCC set, or C{None} if the node is not present in 432 the results of Tarjan's algorithm.""" 433 434 return self.sccMap(**kw).get(node, None)

435

436 - def cyclomaticComplexity (self):

437 """Return the cyclomatic complexity of the graph.""" 438 self.tarjan() 439 return len(self.__edges) - len(self.__nodes) + 2 * len(self.__scc)

440

441 - def __dfsWalk (self, source):

442 assert not (source in self.__dfsWalked) 443 self.__dfsWalked.add(source) 444 for target in self.__edgeMap.get(source, []): 445 if not (target in self.__dfsWalked): 446 self.__dfsWalk(target) 447 self.__dfsOrder.append(source)

448

449 - def _generateDOT (self, title='UNKNOWN', labeller=None):

450 node_map = { } 451 idx = 1 452 for n in self.__nodes: 453 node_map[n] = idx 454 idx += 1 455 text = [] 456 text.append('digraph "%s" {' % (title,)) 457 for n in self.__nodes: 458 if labeller is not None: 459 nn = labeller(n) 460 else: 461 nn = str(n) 462 text.append('%s [shape=box,label="%s"];' % (node_map[n], nn)) 463 for s in self.__nodes: 464 for d in self.__edgeMap.get(s, []): 465 if s != d: 466 text.append('%s -> %s;' % (node_map[s], node_map[d])) 467 text.append("};") 468 return "\n".join(text)

469

470 - def dfsOrder (self, reset=False):

471 """Return the nodes of the graph in U{depth-first-search 472 order<http://en.wikipedia.org/wiki/Depth-first_search>}. 473 474 The data structure is a list. Calculated lists are retained 475 and returned on future invocations, subject to the C{reset} 476 keyword. 477 478 @keyword reset: If C{True}, discard cached results and recompute the order.""" 479 if reset or (self.__dfsOrder is None): 480 self.__dfsWalked = set() 481 self.__dfsOrder = [] 482 for root in self.roots(reset=reset): 483 self.__dfsWalk(root) 484 self.__dfsWalked = None 485 if len(self.__dfsOrder) != len(self.__nodes): 486 raise Exception('DFS walk did not cover all nodes (walk %d versus nodes %d)' % (len(self.__dfsOrder), len(self.__nodes))) 487 return self.__dfsOrder

488 489 LocationPrefixRewriteMap_ = { } 490

491 -def SetLocationPrefixRewriteMap (prefix_map):

492 """Set the map that is used to by L{NormalizeLocation} to rewrite URI prefixes.""" 493 494 LocationPrefixRewriteMap_.clear() 495 LocationPrefixRewriteMap_.update(prefix_map)

496

497 -def NormalizeLocation (uri, parent_uri=None, prefix_map=None):

498 """Normalize a URI against an optional parent_uri in the way that is 499 done for C{schemaLocation} attribute values. 500 501 If no URI schema is present, this will normalize a file system 502 path. 503 504 Optionally, the resulting absolute URI can subsequently be 505 rewritten to replace specified prefix strings with alternative 506 strings, e.g. to convert a remote URI to a local repository. This 507 rewriting is done after the conversion to an absolute URI, but 508 before normalizing file system URIs. 509 510 @param uri : The URI to normalize. If C{None}, function returns 511 C{None} 512 513 @param parent_uri : The base URI against which normalization is 514 done, if C{uri} is a relative URI. 515 516 @param prefix_map : A map used to rewrite URI prefixes. If 517 C{None}, the value defaults to that stored by 518 L{SetLocationPrefixRewriteMap}. 519 520 """ 521 import urlparse 522 import os 523 524 if uri is None: 525 return uri 526 if parent_uri is None: 527 abs_uri = uri 528 else: 529 #if (0 > parent_uri.find(':')) and (not parent_uri.endswith(os.sep)): 530 # parent_uri = parent_uri + os.sep 531 abs_uri = urlparse.urljoin(parent_uri, uri) 532 if prefix_map is None: 533 prefix_map = LocationPrefixRewriteMap_ 534 for (pfx, sub) in prefix_map.items(): 535 if abs_uri.startswith(pfx): 536 abs_uri = sub + abs_uri[len(pfx):] 537 if 0 > abs_uri.find(':'): 538 abs_uri = os.path.realpath(abs_uri) 539 return abs_uri

540 541 import urlparse 542

543 -def TextFromURI (uri, archive_directory=None):

544 """Retrieve the contents of the uri as a text string. 545 546 If the uri does not include a scheme (e.g., C{http:}), it is 547 assumed to be a file path on the local system.""" 548 import urllib2 549 xmls = None 550 try: 551 if 0 <= uri.find(':'): 552 xmls = urllib2.urlopen(uri).read() 553 else: 554 xmls = file(uri).read() 555 archive_directory = None 556 except Exception, e: 557 print 'TextFromURI: open %s caught: %s' % (uri, e) 558 raise 559 if archive_directory: 560 base_name = os.path.basename(os.path.normpath(urlparse.urlparse(uri)[2])) 561 counter = 1 562 dest_file = os.path.join(archive_directory, base_name) 563 while os.path.isfile(dest_file): 564 dest_file = os.path.join(archive_directory, '%s.%d' % (base_name, counter)) 565 counter += 1 566 try: 567 OpenOrCreate(dest_file).write(xmls) 568 except OSError, e: 569 print 'WARNING: Unable to save %s in %s: %s' % (uri, dest_file, e) 570 return xmls

571

572 -def OpenOrCreate (file_name, tag=None, preserve_contents=False):

573 """Return a file object used to write the given file. 574 575 Use the C{tag} keyword to preserve the contents of existing files 576 that are not supposed to be overwritten. 577 578 To get a writable file but leaving any existing contents in place, 579 set the C{preserve_contents} keyword to C{True}. Normally, existing file 580 contents are erased. 581 582 The returned file pointer is positioned at the end of the file. 583 584 @keyword tag: If not C{None} and the file already exists, absence 585 of the given value in the first 4096 bytes of the file causes an 586 C{IOError} to be raised with C{errno} set to C{EEXIST}. I.e., 587 only files with this value in the first 4KB will be returned for 588 writing. 589 590 @keyword preserve_contents: This value controls whether existing 591 contents of the file will be erased (C{False}, default) or left in 592 place (C{True}). 593 """ 594 (path, leaf) = os.path.split(file_name) 595 if path: 596 try: 597 os.makedirs(path) 598 except Exception, e: 599 if not (isinstance(e, (OSError, IOError)) and (errno.EEXIST == e.errno)): 600 raise 601 fp = file(file_name, 'a+') 602 if (tag is not None) and (0 < os.fstat(fp.fileno()).st_size): 603 text = fp.read(4096) 604 if 0 > text.find(tag): 605 raise OSError(errno.EEXIST, os.strerror(errno.EEXIST)) 606 if not preserve_contents: 607 fp.seek(0) # os.SEEK_SET 608 fp.truncate() 609 else: 610 fp.seek(2) # os.SEEK_END 611 return fp

612 613 # hashlib didn't show up until 2.5, and sha is deprecated in 2.6. 614 __Hasher = None 615 try: 616 import hashlib 617 __Hasher = hashlib.sha1 618 except ImportError: 619 import sha 620 __Hasher = sha.new 621

622 -def HashForText (text):

623 """Calculate a cryptographic hash of the given string. 624 625 For example, this is used to verify that a given module file 626 contains bindings from a previous generation run for the same 627 namespace. See L{OpenOrCreate}. 628 629 @return: A C{str}, generally a sequence of hexadecimal "digit"s. 630 """ 631 return __Hasher(text).hexdigest()

632 633 # uuid didn't show up until 2.5 634 __HaveUUID = False 635 try: 636 import uuid 637 __HaveUUID = True 638 except ImportError: 639 import time 640 import random

641 -def _NewUUIDString ():

642 """Obtain a UUID using the best available method. On a version of 643 python that does not incorporate the C{uuid} class, this creates a 644 string combining the current date and time (to the second) with a 645 random number. 646 647 @rtype: C{str} 648 """ 649 if __HaveUUID: 650 return uuid.uuid1().urn 651 return '%s:%08.8x' % (time.strftime('%Y%m%d%H%M%S'), random.randint(0, 0xFFFFFFFFL))

652

653 -class UniqueIdentifier (object):

654 """Records a unique identifier, generally associated with a 655 binding generation action. 656 657 The identifier is a string, but gets wrapped in an instance of 658 this class to optimize comparisons and reduce memory footprint. 659 660 Invoking the constructor for this class on the same string 661 multiple times will return the same Python object. 662 663 An instance of this class compares equal to, and hashes equivalent 664 to, the uid string. When C{str}'d, the result is the uid; when 665 C{repr}'d, the result is a constructor call to 666 C{pyxb.utils.utility.UniqueIdentifier}. 667 """ 668 669 # A map from UID string to the instance that represents it 670 __ExistingUIDs = {} 671

672 - def uid (self):

673 """The string unique identifier""" 674 return self.__uid

675 __uid = None 676 677 # Support pickling, which is done using only the UID.

678 - def __getnewargs__ (self):

679 return (self.__uid,)

680

681 - def __getstate__ (self):

682 return self.__uid

683

684 - def __setstate__ (self, state):

685 assert self.__uid == state

686 687 # Singleton-like

688 - def __new__ (cls, *args):

689 if 0 == len(args): 690 uid = _NewUUIDString() 691 else: 692 uid = args[0] 693 if isinstance(uid, UniqueIdentifier): 694 uid = uid.uid() 695 if not isinstance(uid, basestring): 696 raise TypeError('UniqueIdentifier uid must be a string') 697 rv = cls.__ExistingUIDs.get(uid) 698 if rv is None: 699 rv = super(UniqueIdentifier, cls).__new__(cls) 700 rv.__uid = uid 701 cls.__ExistingUIDs[uid] = rv 702 return rv

703

704 - def associateObject (self, obj):

705 """Associate the given object witth this identifier. 706 707 This is a one-way associatoin: the object is not provided with 708 a return path to this identifier instance.""" 709 self.__associatedObjects.add(obj)

710 - def associatedObjects (self):

711 """The set of objects that have been associated with this 712 identifier instance.""" 713 return self.__associatedObjects

714 __associatedObjects = None 715

716 - def __init__ (self, uid=None):

717 """Create a new UniqueIdentifier instance. 718 719 @param uid: The unique identifier string. If present, it is 720 the callers responsibility to ensure the value is universally 721 unique. If C{None}, one will be provided. 722 @type uid: C{str} or C{unicode} 723 """ 724 assert (uid is None) or (self.uid() == uid), 'UniqueIdentifier: ctor %s, actual %s' % (uid, self.uid()) 725 self.__associatedObjects = set()

726

727 - def __eq__ (self, other):

728 if other is None: 729 return False 730 elif isinstance(other, UniqueIdentifier): 731 other_uid = other.uid() 732 elif isinstance(other, basestring): 733 other_uid = other 734 else: 735 raise TypeError('UniqueIdentifier: Cannot compare with type %s' % (type(other),)) 736 return self.uid() == other_uid

737

738 - def __hash__ (self):

739 return hash(self.uid())

740

741 - def __str__ (self):

742 return self.uid()

743

744 - def __repr__ (self):

745 return 'pyxb.utils.utility.UniqueIdentifier(%s)' % (repr(self.uid()),)

746 747 import datetime 748 import calendar 749 import time

750 -class UTCOffsetTimeZone (datetime.tzinfo):

751 """A C{datetime.tzinfo} subclass that helps deal with UTC 752 conversions in an ISO8601 world. 753 754 This class only supports fixed offsets from UTC. 755 """ 756 757 # Regular expression that matches valid ISO8601 time zone suffixes 758 __Lexical_re = re.compile('^([-+])(\d\d):(\d\d)$') 759 760 # The offset in minutes east of UTC. 761 __utcOffset_min = 0 762 763 # Same as __utcOffset_min, but as a datetime.timedelta 764 __utcOffset_td = None 765 766 # A zero-length duration 767 __ZeroDuration = datetime.timedelta(0) 768

769 - def __init__ (self, spec=None, flip=False):

770 """Create a time zone instance with a fixed offset from UTC. 771 772 @param spec: Specifies the offset. Can be an integer counting 773 minutes east of UTC, the value C{None} (equal to 0 minutes 774 east), or a string that conform to the ISO8601 time zone 775 sequence (B{Z}, or B{[+-]HH:MM}). 776 777 @param flip: If C{False} (default), no adaptation is done. If 778 C{True}, the time zone offset is negated, resulting in the 779 conversion from localtime to UTC rather than the default of 780 UTC to localtime. 781 """ 782 783 if spec is not None: 784 if isinstance(spec, basestring): 785 if 'Z' == spec: 786 self.__utcOffset_min = 0 787 else: 788 match = self.__Lexical_re.match(spec) 789 if match is None: 790 raise ValueError('Bad time zone: %s' % (spec,)) 791 self.__utcOffset_min = int(match.group(2)) * 60 + int(match.group(3)) 792 if '-' == match.group(1): 793 self.__utcOffset_min = - self.__utcOffset_min 794 elif isinstance(spec, int): 795 self.__utcOffset_min = spec 796 elif isinstance(spec, datetime.timedelta): 797 self.__utcOffset_min = spec.seconds / 60 798 else: 799 raise TypeError('%s: unexpected type %s' % (type(self), type(spec))) 800 if flip: 801 self.__utcOffset_min = - self.__utcOffset_min 802 self.__utcOffset_td = datetime.timedelta(minutes=self.__utcOffset_min) 803 if 0 == self.__utcOffset_min: 804 self.__tzName = 'Z' 805 elif 0 > self.__utcOffset_min: 806 self.__tzName = '-%02d%02d' % divmod(-self.__utcOffset_min, 60) 807 else: 808 self.__tzName = '+%02d%02d' % divmod(self.__utcOffset_min, 60)

809

810 - def utcoffset (self, dt):

811 """Returns the constant offset for this zone.""" 812 return self.__utcOffset_td

813

814 - def tzname (self, dt):

815 """Return the name of the timezone in ISO8601 format.""" 816 return self.__tzName

817

818 - def dst (self, dt):

819 """Returns a constant zero duration.""" 820 return self.__ZeroDuration

821

822 -class LocalTimeZone (datetime.tzinfo):

823 """A C{datetime.tzinfo} subclass for the local time zone. 824 825 Mostly pinched from the C{datetime.tzinfo} documentation in Python 2.5.1. 826 """ 827 828 __STDOffset = datetime.timedelta(seconds=-time.timezone) 829 __DSTOffset = __STDOffset 830 if time.daylight: 831 __DSTOffset = datetime.timedelta(seconds=-time.altzone) 832 __ZeroDelta = datetime.timedelta(0) 833 __DSTDelta = __DSTOffset - __STDOffset 834

835 - def utcoffset (self, dt):

836 if self.__isDST(dt): 837 return self.__DSTOffset 838 return self.__STDOffset

839

840 - def dst (self, dt):

841 if self.__isDST(dt): 842 return self.__DSTDelta 843 return self.__ZeroDelta

844

845 - def tzname (self, dt):

846 return time.tzname[self.__isDST(dt)]

847

848 - def __isDST (self, dt):

849 tt = (dt.year, dt.month, dt.day, 850 dt.hour, dt.minute, dt.second, 851 0, 0, -1) 852 tt = time.localtime(time.mktime(tt)) 853 return tt.tm_isdst > 0

854

855 -class PrivateTransient_mixin (pyxb.cscRoot):

856 """Emulate the B{transient} keyword from Java for private member 857 variables. 858 859 This class defines a C{__getstate__} method which returns a copy 860 of C{self.__dict__} with certain members removed. Specifically, 861 if a string "s" appears in a class member variable named 862 C{__PrivateTransient} defined in the "Class" class, then the 863 corresponding private variable "_Class__s" will be removed from 864 the state dictionary. This is used to eliminate unnecessary 865 fields from instances placed in L{namespace 866 archives<pyxb.namespace.archive.NamespaceArchive>} without having 867 to implement a C{__getstate__} method in every class in the 868 instance hierarchy. 869 870 For an example, see 871 L{pyxb.xmlschema.structures._SchemaComponent_mixin} 872 873 If you use this, it is your responsibility to define the 874 C{__PrivateTransient} class variable and add to it the required 875 variable names. 876 877 Classes that inherit from this are free to define their own 878 C{__getstate__} method, which may or may not invoke the superclass 879 one. If you do this, be sure that the class defining 880 C{__getstate__} lists L{PrivateTransient_mixin} as one of its 881 direct superclasses, lest the latter end up earlier in the mro and 882 consequently bypass the local override. 883 """ 884 885 # Suffix used when creating the class member variable in which the 886 # transient members are cached. 887 __Attribute = '__PrivateTransient' 888

889 - def __getstate__ (self):

890 state = self.__dict__.copy() 891 # Note that the aggregate set is stored in a class variable 892 # with a slightly different name than the class-level set. 893 attr = '_%s%s_' % (self.__class__.__name__, self.__Attribute) 894 skipped = getattr(self.__class__, attr, None) 895 if skipped is None: 896 skipped = set() 897 for cl in self.__class__.mro(): 898 for (k, v) in cl.__dict__.items(): 899 if k.endswith(self.__Attribute): 900 cl2 = k[:-len(self.__Attribute)] 901 skipped.update([ '%s__%s' % (cl2, _n) for _n in v ]) 902 setattr(self.__class__, attr, skipped) 903 #print 'Defined skipped for %s: %s' % (self.__class__, skipped) 904 for k in skipped: 905 if state.get(k) is not None: 906 #print 'Stripping %s from instance %x of %s' % (k, id(self), type(self)) 907 del state[k] 908 # Uncomment the following to test whether undesirable types 909 # are being pickled, generally by accidently leaving a 910 # reference to one in an instance private member. 911 #for (k, v) in state.items(): 912 # import pyxb.namespace 913 # import xml.dom 914 # import pyxb.xmlschema.structures 915 # if isinstance(v, (pyxb.namespace.resolution.NamespaceContext, xml.dom.Node, pyxb.xmlschema.structures.Schema)): 916 # raise pyxb.LogicError('Unexpected instance of %s key %s in %s' % (type(v), k, self)) 917 918 return state

919

920 -def GetMatchingFiles (path, pattern=None, default_path_wildcard=None, default_path=None, prefix_pattern=None, prefix_substituend=None):

921 """Provide a list of absolute paths to files present in any of a 922 set of directories and meeting certain criteria. 923 924 This is used, for example, to locate namespace archive files 925 within the archive path specified by the user. One could use:: 926 927 files = GetMatchingFiles('&bundles//:+', 928 pattern=re.compile('.*\.wxs$'), 929 default_path_wildcard='+', 930 default_path='/usr/local/pyxb/nsarchives', 931 prefix_pattern='&', 932 prefix_substituend='/opt/pyxb') 933 934 to obtain all files that can be recursively found within 935 C{/opt/pyxb/bundles}, or non-recursively within 936 C{/usr/local/pyxb/nsarchives}. 937 938 @param path: A colon separated list of directories in which the 939 search should be performed. If a path entry ends with C{//}, any 940 directory beneath it is scanned as well, recursively. 941 942 @keyword pattern: Optional regular expression object used to 943 determine whether a given directory entry should be returned. If 944 left as C{None}, all directory entries will be returned. 945 946 @keyword default_path_wildcard: An optional string which, if 947 present as a single directory in the path, is replaced by the 948 value of C{default-path}. 949 950 @keyword default_path: A system-defined directory which can be 951 restored to the path by placing the C{default_path_wildcard} in 952 the C{path}. 953 954 @keyword prefix_pattern: An optional string which, if present at 955 the start of a path element, is replaced by the value of 956 C{prefix_substituend}. 957 958 @keyword prefix_substituend: A system-defined string (path prefix) 959 which can be combined with the user-provided path information to 960 identify a file or subdirectory within an installation-specific 961 area. 962 """ 963 matching_files = [] 964 path_set = path.split(':') 965 while 0 < len(path_set): 966 path = path_set.pop(0) 967 if default_path_wildcard == path: 968 if default_path is not None: 969 path_set[0:0] = default_path.split(':') 970 continue 971 recursive = False 972 if (prefix_pattern is not None) and path.startswith(prefix_pattern): 973 path = os.path.join(prefix_substituend, path[len(prefix_pattern):]) 974 if path.endswith('//'): 975 recursive = True 976 path = path[:-2] 977 if os.path.isfile(path): 978 if (pattern is None) or (pattern.search(path) is not None): 979 matching_files.append(path) 980 else: 981 for (root, dirs, files) in os.walk(path): 982 for f in files: 983 if (pattern is None) or (pattern.search(f) is not None): 984 matching_files.append(os.path.join(root, f)) 985 if not recursive: 986 break 987 return matching_files

988

989 -class _LocationBase (object):

990 """Wrap a location. 991 992 This is probably a string, but might be a uri object or the like. 993 Really we only have this as a separate object so as to avoid 994 creating hundreds of copies of the same string.""" 995 996 __locationBase = None

997 - def locationBase (self):

998 return self.__locationBase

999

1000 - def __init__ (self, location_base):

1001 if isinstance(location_base, _LocationBase): 1002 location_base = location_base.locationBase() 1003 self.__locationBase = location_base

1004

1005 - def __str__ (self):

1006 return str(self.__locationBase)

1007

1008 -class Location (object):

1009 __locationBase = None 1010 __lineNumber = None 1011 __columnNumber = None 1012

1013 - def __init__ (self, location_base=None, line_number=None, column_number=None):

1014 if not isinstance(location_base, _LocationBase): 1015 location_base = _LocationBase(location_base) 1016 self.__locationBase = location_base 1017 self.__lineNumber = line_number 1018 self.__columnNumber = column_number

1019

1020 - def newLocation (self, locator=None, line_number=None, column_number=None):

1021 if locator is not None: 1022 try: 1023 line_number = locator.getLineNumber() 1024 column_number = locator.getColumnNumber() 1025 except: 1026 pass 1027 return Location(self.__locationBase, line_number, column_number)

1028 1029 locationBase = property(lambda _s: _s.__locationBase) 1030 lineNumber = property(lambda _s: _s.__lineNumber) 1031 columnNumber = property(lambda _s: _s.__columnNumber) 1032

1033 - def __str__ (self):

1034 if self.locationBase is None: 1035 return '<unknownLocation>' 1036 return '%s[%s:%s]' % (self.locationBase, self.lineNumber, self.columnNumber)

1037

1038 -class Locatable_mixin (pyxb.cscRoot):

1039 __location = None 1040

1041 - def __init__ (self, *args, **kw):

1042 self.__location = kw.pop('location', None) 1043 super(Locatable_mixin, self).__init__(*args, **kw)

1044

1045 - def _setLocation (self, location):

1046 self.__location = location

1047

1048 - def _location (self):

1049 return self.__location

1050

Source Code for Module pyxb.utils.utility