1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 """Utility functions and classes."""
17
18 import re
19 import os
20 import errno
21 import pyxb
22 import urlparse
23 import time
24 import datetime
25 import logging
26
27 _log = logging.getLogger(__name__)
30 """Class decorator that fills in missing ordering methods.
31
32 Concept derived from Python 2.7.5 functools.total_ordering,
33 but this version requires that __eq__ and __lt__ be provided,
34 and unconditionally overrides __ne__, __gt__, __le__, and __ge__
35 with the derived versions.
36
37 This is still necessary in Python 3 because in Python 3 the
38 comparison x >= y is done by the __ge__ inherited from object,
39 which does not handle the case where x and y are not the same type
40 even if the underlying y < x would convert x to be compatible. """
41
42 def applyconvert (cls, derived):
43 for (opn, opx) in derived:
44 opx.__name__ = opn
45 opx.__doc__ = getattr(int, opn).__doc__
46 setattr(cls, opn, opx)
47
48 applyconvert(cls, (
49 ('__gt__', lambda self, other: not (self.__lt__(other) or self.__eq__(other))),
50 ('__le__', lambda self, other: self.__lt__(other) or self.__eq__(other)),
51 ('__ge__', lambda self, other: not self.__lt__(other))
52 ))
53 applyconvert(cls, (
54 ('__ne__', lambda self, other: not self.__eq__(other)),
55 ))
56 return cls
57
59 """Tuple comparison that permits C{None} as lower than any value,
60 and defines other cross-type comparison.
61
62 @return: -1 if lhs < rhs, 0 if lhs == rhs, 1 if lhs > rhs."""
63 li = iter(lhs)
64 ri = iter(rhs)
65 while True:
66 try:
67 (lv, rv) = (li.next(), ri.next())
68 if lv is None:
69 if rv is None:
70 continue
71 return -1
72 if rv is None:
73 return 1
74 if lv == rv:
75 continue
76 if lv < rv:
77 return -1
78 return 1
79 except StopIteration:
80 nl = len(lhs)
81 nr = len(rhs)
82 if nl < nr:
83 return -1
84 if nl == nr:
85 return 0
86 return 1
87
89 """Convert a string into a literal value that can be used in Python source.
90
91 This just calls C{repr}. No point in getting all complex when the language
92 already gives us what we need.
93
94 @rtype: C{str}
95 """
96 return repr(s)
97
99 """Default implementation for _XMLIdentifierToPython
100
101 For historical reasons, this converts the identifier from a str to
102 unicode in the system default encoding. This should have no
103 practical effect.
104
105 @param identifier : some XML identifier
106
107 @return: C{unicode(identifier)}
108 """
109
110 return unicode(identifier)
111
113 """Configure a callable L{MakeIdentifier} uses to pre-process an XM Lidentifier.
114
115 In Python3, identifiers can be full Unicode tokens, but in Python2,
116 all identifiers must be ASCII characters. L{MakeIdentifier} enforces
117 this by removing all characters that are not valid within an
118 identifier.
119
120 In some cases, an application generating bindings may be able to
121 transliterate Unicode code points that are not valid Python identifier
122 characters into something else. This callable can be assigned to
123 perform that translation before the invalid characters are
124 stripped.
125
126 It is not the responsibility of this callable to do anything other
127 than replace whatever characters it wishes to. All
128 transformations performed by L{MakeIdentifier} will still be
129 applied, to ensure the output is in fact a legal identifier.
130
131 @param xml_identifier_to_python : A callable that takes a string
132 and returns a Unicode, possibly with non-identifier characters
133 replaced by other characters. Pass C{None} to reset to the
134 default implementation, which is L{_DefaultXMLIdentifierToPython}.
135
136 @rtype: C{unicode}
137 """
138 global _XMLIdentifierToPython
139 if xml_identifier_to_python is None:
140 xml_identifier_to_python = _DefaultXMLIdentifierToPython
141 _XMLIdentifierToPython = xml_identifier_to_python
142
143 _XMLIdentifierToPython = _DefaultXMLIdentifierToPython
144
145 _UnderscoreSubstitute_re = re.compile(r'[- .]')
146 _NonIdentifier_re = re.compile(r'[^a-zA-Z0-9_]')
147 _PrefixUnderscore_re = re.compile(r'^_+')
148 _PrefixDigit_re = re.compile(r'^\d+')
149 _CamelCase_re = re.compile(r'_\w')
152 """Convert a string into something suitable to be a Python identifier.
153
154 The string is processed by L{_XMLIdentifierToPython}. Following
155 this, dashes, spaces, and periods are replaced by underscores, and
156 characters not permitted in Python identifiers are stripped.
157 Furthermore, any leading underscores are removed. If the result
158 begins with a digit, the character 'n' is prepended. If the
159 result is the empty string, the string 'emptyString' is
160 substituted.
161
162 No check is made for L{conflicts with keywords <DeconflictKeyword>}.
163
164 @keyword camel_case : If C{True}, any underscore in the result
165 string that is immediately followed by an alphanumeric is replaced
166 by the capitalized version of that alphanumeric. Thus,
167 'one_or_two' becomes 'oneOrTwo'. If C{False} (default), has no
168 effect.
169
170 @rtype: C{str}
171 """
172 s = _XMLIdentifierToPython(s)
173 s = _PrefixUnderscore_re.sub('', _NonIdentifier_re.sub('', _UnderscoreSubstitute_re.sub('_', s)))
174 if camel_case:
175 s = _CamelCase_re.sub(lambda _m: _m.group(0)[1].upper(), s)
176 if _PrefixDigit_re.match(s):
177 s = 'n' + s
178 if 0 == len(s):
179 s = 'emptyString'
180 return s
181
182 _PythonKeywords = frozenset( (
183 "and", "as", "assert", "break", "class", "continue", "def", "del",
184 "elif", "else", "except", "exec", "finally", "for", "from", "global",
185 "if", "import", "in", "is", "lambda", "not", "or", "pass", "print",
186 "raise", "return", "try", "while", "with", "yield"
187 ) )
188 """Python keywords. Note that types like int and float are not
189 keywords.
190
191 @see: U{http://docs.python.org/reference/lexical_analysis.html#keywords}."""
192
193 _PythonBuiltInConstants = frozenset( (
194 "False", "True", "None", "NotImplemented", "Ellipsis", "__debug__",
195
196
197
198 "set"
199 ) )
200 """Other symbols that aren't keywords but that can't be used.
201
202 @see: U{http://docs.python.org/library/constants.html}."""
203
204 _Keywords = frozenset(_PythonKeywords.union(_PythonBuiltInConstants))
205 """The keywords reserved for Python, derived from L{_PythonKeywords}
206 and L{_PythonBuiltInConstants}."""
209 """If the provided string C{s} matches a Python language keyword,
210 append an underscore to distinguish them.
211
212 See also L{MakeUnique}.
213
214 @param s: string to be deconflicted
215
216 @keyword aux_keywords: optional iterable of additional strings
217 that should be treated as keywords.
218
219 @rtype: C{str}
220
221 """
222 if (s in _Keywords) or (s in aux_keywords):
223 return '%s_' % (s,)
224 return s
225
227 """Return an identifier based on C{s} that is not in the given set.
228
229 The returned identifier is made unique by appending an underscore
230 and, if necessary, a serial number.
231
232 The order is : C{x}, C{x_}, C{x_2}, C{x_3}, ...
233
234 @param in_use: The set of identifiers already in use in the
235 relevant scope. C{in_use} is updated to contain the returned
236 identifier.
237
238 @rtype: C{str}
239 """
240 if s in in_use:
241 ctr = 2
242 s = s.rstrip('_')
243 candidate = '%s_' % (s,)
244 while candidate in in_use:
245 candidate = '%s_%d' % (s, ctr)
246 ctr += 1
247 s = candidate
248 in_use.add(s)
249 return s
250
251 -def PrepareIdentifier (s, in_use, aux_keywords=frozenset(), private=False, protected=False):
252 """Combine everything required to create a unique identifier.
253
254 Leading and trailing underscores are stripped from all
255 identifiers.
256
257 @param in_use: the set of already used identifiers. Upon return
258 from this function, it is updated to include the returned
259 identifier.
260
261 @keyword aux_keywords: an optional set of additional symbols that
262 are illegal in the given context; use this to prevent conflicts
263 with known method names.
264
265 @keyword private: if C{False} (default), all leading underscores
266 are stripped, guaranteeing the identifier will not be private. If
267 C{True}, the returned identifier has two leading underscores,
268 making it a private variable within a Python class.
269
270 @keyword protected: as for C{private}, but uses only one
271 underscore.
272
273 @rtype: C{str}
274
275 @note: Only module-level identifiers should be treated as
276 protected. The class-level L{_DeconflictSymbols_mixin}
277 infrastructure does not include protected symbols. All class and
278 instance members beginning with a single underscore are reserved
279 for the PyXB infrastructure."""
280 s = DeconflictKeyword(MakeIdentifier(s).strip('_'), aux_keywords)
281 if private:
282 s = '__' + s
283 elif protected:
284 s = '_' + s
285 return MakeUnique(s, in_use)
286
289 """Mix-in used to deconflict public symbols in classes that may be
290 inherited by generated binding classes.
291
292 Some classes, like the L{pyxb.binding.basis.element} or
293 L{pyxb.binding.basis.simpleTypeDefinition} classes in
294 L{pyxb.binding.basis}, have public symbols associated with
295 functions and variables. It is possible that an XML schema might
296 include tags and attribute names that match these symbols. To
297 avoid conflict, the reserved symbols marked in this class are
298 added to the pre-defined identifier set.
299
300 Subclasses should create a class-level variable
301 C{_ReservedSymbols} that contains a set of strings denoting the
302 symbols reserved in this class, combined with those from any
303 superclasses that also have reserved symbols. Code like the
304 following is suggested::
305
306 # For base classes (direct mix-in):
307 _ReservedSymbols = set([ 'one', 'two' ])
308 # For subclasses:
309 _ReservedSymbols = SuperClass._ReservedSymbols.union(set([ 'three' ]))
310
311 Only public symbols (those with no underscores) are currently
312 supported. (Private symbols can't be deconflicted that easily,
313 and no protected symbols that derive from the XML are created by
314 the binding generator.)
315 """
316
317 _ReservedSymbols = set()
318 """There are no reserved symbols in the base class."""
319
320
321 __TabCRLF_re = re.compile("[\t\n\r]")
322
323 __MultiSpace_re = re.compile(" +")
326 """Normalize the given string.
327
328 Exactly one of the C{preserve}, C{replace}, and C{collapse} keyword
329 parameters must be assigned the value C{True} by the caller.
330
331 - C{preserve}: the text is returned unchanged.
332
333 - C{replace}: all tabs, newlines, and carriage returns are
334 replaced with ASCII spaces.
335
336 - C{collapse}: the C{replace} normalization is done, then
337 sequences of two or more spaces are replaced by a single space.
338
339 See the U{whiteSpace facet<http://www.w3.org/TR/xmlschema-2/#rf-whiteSpace>}.
340
341 @rtype: C{str}
342 """
343 if preserve:
344 return text
345 text = __TabCRLF_re.sub(' ', text)
346 if replace:
347 return text
348 if collapse:
349 return __MultiSpace_re.sub(' ', text).strip()
350
351 raise Exception('NormalizeWhitespace: No normalization specified')
352
354 """Represent a directed graph with arbitrary objects as nodes.
355
356 This is used in the L{code
357 generator<pyxb.binding.generate.Generator>} to determine order
358 dependencies among components within a namespace, and schema that
359 comprise various namespaces. An edge from C{source} to C{target}
360 indicates that some aspect of C{source} requires that some aspect
361 of C{target} already be available.
362 """
363
365 self.__roots = None
366 if root is not None:
367 self.__roots = set([root])
368 self.__edges = set()
369 self.__edgeMap = { }
370 self.__reverseMap = { }
371 self.__nodes = set()
372
373 __scc = None
374 __sccMap = None
375 __dfsOrder = None
376
377 - def addEdge (self, source, target):
378 """Add a directed edge from the C{source} to the C{target}.
379
380 The nodes are added to the graph if necessary.
381 """
382 self.__edges.add( (source, target) )
383 self.__edgeMap.setdefault(source, set()).add(target)
384 if source != target:
385 self.__reverseMap.setdefault(target, set()).add(source)
386 self.__nodes.add(source)
387 self.__nodes.add(target)
388
390 """Add the given node to the graph."""
391 self.__nodes.add(node)
392
393 __roots = None
394 - def roots (self, reset=False):
395 """Return the set of nodes calculated to be roots (i.e., those that have no incoming edges).
396
397 This caches the roots calculated in a previous invocation
398 unless the C{reset} keyword is given the value C{True}.
399
400 @note: Upon reset, any notes that had been manually added
401 using L{addNode} will no longer be in the set.
402
403 @keyword reset: If C{True}, any cached value is discarded and
404 recomputed. No effect if C{False} (defalut).
405
406 @rtype: C{set}
407 """
408 if reset or (self.__roots is None):
409 self.__roots = set()
410 for n in self.__nodes:
411 if not (n in self.__reverseMap):
412 self.__roots.add(n)
413 return self.__roots
415 """Add the provided node as a root node, even if it has incoming edges.
416
417 The node need not be present in the graph (if necessary, it is added).
418
419 Note that roots added in this way do not survive a reset using
420 L{roots}.
421
422 @return: C{self}
423 """
424 if self.__roots is None:
425 self.__roots = set()
426 self.__nodes.add(root)
427 self.__roots.add(root)
428 return self
429
431 """Return the edges in the graph.
432
433 The edge data structure is a map from the source node to the
434 set of nodes that can be reached in a single step from the
435 source.
436 """
437 return self.__edgeMap
438 __edgeMap = None
439
441 """Return the edges in the graph.
442
443 The edge data structure is a set of node pairs represented as C{( source, target )}.
444 """
445 return self.__edges
446
448 """Return the set of nodes in the graph.
449
450 The node collection data structure is a set containing node
451 objects, whatever they may be."""
452 return self.__nodes
453
454 - def tarjan (self, reset=False):
455 """Execute Tarjan's algorithm on the graph.
456
457 U{Tarjan's
458 algorithm<http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm>}
459 computes the U{strongly-connected
460 components<http://en.wikipedia.org/wiki/Strongly_connected_component>}
461 of the graph: i.e., the sets of nodes that form a minimal
462 closed set under edge transition. In essence, the loops. We
463 use this to detect groups of components that have a dependency
464 cycle.
465
466 @keyword reset: If C{True}, any cached component set is erased
467 and recomputed. If C{True}, an existing previous result is
468 left unchanged."""
469
470 if (self.__scc is not None) and (not reset):
471 return
472 self.__sccMap = { }
473 self.__stack = []
474 self.__sccOrder = []
475 self.__scc = []
476 self.__index = 0
477 self.__tarjanIndex = { }
478 self.__tarjanLowLink = { }
479 for v in self.__nodes:
480 self.__tarjanIndex[v] = None
481 roots = self.roots()
482 if (0 == len(roots)) and (0 < len(self.__nodes)):
483 raise Exception('TARJAN: No roots found in graph with %d nodes' % (len(self.__nodes),))
484 for r in roots:
485 self._tarjan(r)
486 self.__didTarjan = True
487
489 """Do the work of Tarjan's algorithm for a given root node."""
490 if self.__tarjanIndex.get(v) is not None:
491
492 return
493 self.__tarjanIndex[v] = self.__tarjanLowLink[v] = self.__index
494 self.__index += 1
495 self.__stack.append(v)
496 source = v
497 for target in self.__edgeMap.get(source, []):
498 if self.__tarjanIndex[target] is None:
499 self._tarjan(target)
500 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target])
501 elif target in self.__stack:
502 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target])
503 else:
504 pass
505
506 if self.__tarjanLowLink[v] == self.__tarjanIndex[v]:
507 scc = []
508 while True:
509 scc.append(self.__stack.pop())
510 if v == scc[-1]:
511 break
512 self.__sccOrder.append(scc)
513 if 1 < len(scc):
514 self.__scc.append(scc)
515 [ self.__sccMap.setdefault(_v, scc) for _v in scc ]
516
517 - def scc (self, reset=False):
518 """Return the strongly-connected components of the graph.
519
520 The data structure is a set, each element of which is itself a
521 set containing one or more nodes from the graph.
522
523 @see: L{tarjan}.
524 """
525 if reset or (self.__scc is None):
526 self.tarjan(reset)
527 return self.__scc
528 __scc = None
529
530 - def sccMap (self, reset=False):
531 """Return a map from nodes to the strongly-connected component
532 to which the node belongs.
533
534 @keyword reset: If C{True}, the L{tarjan} method will be
535 re-invoked, propagating the C{reset} value. If C{False}
536 (default), a cached value will be returned if available.
537
538 @see: L{tarjan}.
539 """
540 if reset or (self.__sccMap is None):
541 self.tarjan(reset)
542 return self.__sccMap
543 __sccMap = None
544
546 """Return the strongly-connected components in order.
547
548 The data structure is a list, in dependency order, of strongly
549 connected components (which can be single nodes). Appearance
550 of a node in a set earlier in the list indicates that it has
551 no dependencies on any node that appears in a subsequent set.
552 This order is preferred over L{dfsOrder} for code generation,
553 since it detects loops.
554
555 @see: L{tarjan}.
556 """
557 if reset or (self.__sccOrder is None):
558 self.tarjan(reset)
559 return self.__sccOrder
560 __sccOrder = None
561
563 """Return the strongly-connected component to which the given
564 node belongs.
565
566 Any keywords suppliend when invoking this method are passed to
567 the L{sccMap} method.
568
569 @return: The SCC set, or C{None} if the node is not present in
570 the results of Tarjan's algorithm."""
571
572 return self.sccMap(**kw).get(node)
573
575 """Return the cyclomatic complexity of the graph."""
576 self.tarjan()
577 return len(self.__edges) - len(self.__nodes) + 2 * len(self.__scc)
578
580 assert not (source in self.__dfsWalked)
581 self.__dfsWalked.add(source)
582 for target in self.__edgeMap.get(source, []):
583 if not (target in self.__dfsWalked):
584 self.__dfsWalk(target)
585 self.__dfsOrder.append(source)
586
588 node_map = { }
589 idx = 1
590 for n in self.__nodes:
591 node_map[n] = idx
592 idx += 1
593 text = []
594 text.append('digraph "%s" {' % (title,))
595 for n in self.__nodes:
596 if labeller is not None:
597 nn = labeller(n)
598 else:
599 nn = str(n)
600 text.append('%s [shape=box,label="%s"];' % (node_map[n], nn))
601 for s in self.__nodes:
602 for d in self.__edgeMap.get(s, []):
603 if s != d:
604 text.append('%s -> %s;' % (node_map[s], node_map[d]))
605 text.append("};")
606 return "\n".join(text)
607
609 """Return the nodes of the graph in U{depth-first-search
610 order<http://en.wikipedia.org/wiki/Depth-first_search>}.
611
612 The data structure is a list. Calculated lists are retained
613 and returned on future invocations, subject to the C{reset}
614 keyword.
615
616 @keyword reset: If C{True}, discard cached results and recompute the order."""
617 if reset or (self.__dfsOrder is None):
618 self.__dfsWalked = set()
619 self.__dfsOrder = []
620 for root in self.roots(reset=reset):
621 self.__dfsWalk(root)
622 self.__dfsWalked = None
623 if len(self.__dfsOrder) != len(self.__nodes):
624 raise Exception('DFS walk did not cover all nodes (walk %d versus nodes %d)' % (len(self.__dfsOrder), len(self.__nodes)))
625 return self.__dfsOrder
626
628 """Return the nodes of the graph as a sequence of root sets.
629
630 The first root set is the set of nodes that are roots: i.e.,
631 have no incoming edges. The second root set is the set of
632 nodes that have incoming nodes in the first root set. This
633 continues until all nodes have been reached. The sets impose
634 a partial order on the nodes, without being as constraining as
635 L{sccOrder}.
636
637 @return: a list of the root sets."""
638 order = []
639 nodes = set(self.__nodes)
640 edge_map = {}
641 for (d, srcs) in self.__edgeMap.iteritems():
642 edge_map[d] = srcs.copy()
643 while nodes:
644 freeset = set()
645 for n in nodes:
646 if not (n in edge_map):
647 freeset.add(n)
648 if 0 == len(freeset):
649 _log.error('dependency cycle in named components')
650 return None
651 order.append(freeset)
652 nodes.difference_update(freeset)
653 new_edge_map = {}
654 for (d, srcs) in edge_map.iteritems():
655 srcs.difference_update(freeset)
656 if 0 != len(srcs):
657 new_edge_map[d] = srcs
658 edge_map = new_edge_map
659 return order
660
661 LocationPrefixRewriteMap_ = { }
668
670 """Normalize a URI against an optional parent_uri in the way that is
671 done for C{schemaLocation} attribute values.
672
673 If no URI schema is present, this will normalize a file system
674 path.
675
676 Optionally, the resulting absolute URI can subsequently be
677 rewritten to replace specified prefix strings with alternative
678 strings, e.g. to convert a remote URI to a local repository. This
679 rewriting is done after the conversion to an absolute URI, but
680 before normalizing file system URIs.
681
682 @param uri : The URI to normalize. If C{None}, function returns
683 C{None}
684
685 @param parent_uri : The base URI against which normalization is
686 done, if C{uri} is a relative URI.
687
688 @param prefix_map : A map used to rewrite URI prefixes. If
689 C{None}, the value defaults to that stored by
690 L{SetLocationPrefixRewriteMap}.
691
692 """
693 if uri is None:
694 return uri
695 if parent_uri is None:
696 abs_uri = uri
697 else:
698
699
700 abs_uri = urlparse.urljoin(parent_uri, uri)
701 if prefix_map is None:
702 prefix_map = LocationPrefixRewriteMap_
703 for (pfx, sub) in prefix_map.iteritems():
704 if abs_uri.startswith(pfx):
705 abs_uri = sub + abs_uri[len(pfx):]
706 if 0 > abs_uri.find(':'):
707 abs_uri = os.path.realpath(abs_uri)
708 return abs_uri
709
712 """Retrieve the contents of the uri as raw data.
713
714 If the uri does not include a scheme (e.g., C{http:}), it is
715 assumed to be a file path on the local system."""
716 import urllib
717 import urllib2
718 stream = None
719 exc = None
720
721
722
723 if 0 <= uri.find(':'):
724 try:
725 stream = urllib2.urlopen(uri)
726 except Exception as e:
727 exc = e
728 if stream is None:
729 try:
730 stream = urllib.urlopen(uri)
731 exc = None
732 except:
733
734 pass
735 if stream is None:
736
737 try:
738 stream = open(uri, 'rb')
739 exc = None
740 except Exception as e:
741 if exc is None:
742 exc = e
743 if exc is not None:
744 _log.error('open %s', uri, exc_info=exc)
745 raise exc
746 try:
747
748
749 if isinstance(stream, file) or isinstance(stream.fp, file):
750 archive_directory = None
751 except:
752 pass
753 xmld = stream.read()
754 if archive_directory:
755 base_name = os.path.basename(os.path.normpath(urlparse.urlparse(uri)[2]))
756 counter = 1
757 dest_file = os.path.join(archive_directory, base_name)
758 while os.path.isfile(dest_file):
759 dest_file = os.path.join(archive_directory, '%s.%d' % (base_name, counter))
760 counter += 1
761 try:
762 OpenOrCreate(dest_file).write(xmld)
763 except OSError as e:
764 _log.warning('Unable to save %s in %s: %s', uri, dest_file, e)
765 return xmld
766
767 -def OpenOrCreate (file_name, tag=None, preserve_contents=False):
768 """Return a file object used to write binary data into the given file.
769
770 Use the C{tag} keyword to preserve the contents of existing files
771 that are not supposed to be overwritten.
772
773 To get a writable file but leaving any existing contents in place,
774 set the C{preserve_contents} keyword to C{True}. Normally, existing file
775 contents are erased.
776
777 The returned file pointer is positioned at the end of the file.
778
779 @keyword tag: If not C{None} and the file already exists, absence
780 of the given value in the first 4096 bytes of the file (decoded as
781 UTF-8) causes an C{IOError} to be raised with C{errno} set to
782 C{EEXIST}. I.e., only files with this value in the first 4KB will
783 be returned for writing.
784
785 @keyword preserve_contents: This value controls whether existing
786 contents of the file will be erased (C{False}, default) or left in
787 place (C{True}).
788 """
789 (path, leaf) = os.path.split(file_name)
790 if path:
791 try:
792 os.makedirs(path)
793 except Exception as e:
794 if not (isinstance(e, (OSError, IOError)) and (errno.EEXIST == e.errno)):
795 raise
796 fp = open(file_name, 'ab+')
797 if (tag is not None) and (0 < os.fstat(fp.fileno()).st_size):
798 fp.seek(0)
799 blockd = fp.read(4096)
800 blockt = blockd.decode('utf-8')
801 if 0 > blockt.find(tag):
802 raise OSError(errno.EEXIST, os.strerror(errno.EEXIST))
803 if not preserve_contents:
804 fp.seek(0)
805 fp.truncate()
806 else:
807 fp.seek(2)
808 return fp
809
810
811 __Hasher = None
812 try:
813 import hashlib
814 __Hasher = hashlib.sha1
815 except ImportError:
816 import sha
817 __Hasher = sha.new
818
819 -def HashForText (text):
820 """Calculate a cryptographic hash of the given string.
821
822 For example, this is used to verify that a given module file
823 contains bindings from a previous generation run for the same
824 namespace. See L{OpenOrCreate}. If the text is in Unicode, the
825 hash is calculated on the UTF-8 encoding of the text.
826
827 @return: A C{str}, generally a sequence of hexadecimal "digit"s.
828 """
829 if isinstance(text, unicode):
830 text = text.encode('utf-8')
831 return __Hasher(text).hexdigest()
832
833
834 __HaveUUID = False
835 try:
836 import uuid
837 __HaveUUID = True
838 except ImportError:
839 import random
841 """Obtain a UUID using the best available method. On a version of
842 python that does not incorporate the C{uuid} class, this creates a
843 string combining the current date and time (to the second) with a
844 random number.
845
846 @rtype: C{str}
847 """
848 if __HaveUUID:
849 return uuid.uuid1().urn
850 return '%s:%08.8x' % (time.strftime('%Y%m%d%H%M%S'), random.randint(0, 0xFFFFFFFFL))
851
853 """Records a unique identifier, generally associated with a
854 binding generation action.
855
856 The identifier is a string, but gets wrapped in an instance of
857 this class to optimize comparisons and reduce memory footprint.
858
859 Invoking the constructor for this class on the same string
860 multiple times will return the same Python object.
861
862 An instance of this class compares equal to, and hashes equivalent
863 to, the uid string. When C{str}'d, the result is the uid; when
864 C{repr}'d, the result is a constructor call to
865 C{pyxb.utils.utility.UniqueIdentifier}.
866 """
867
868
869 __ExistingUIDs = {}
870
872 """The string unique identifier"""
873 return self.__uid
874 __uid = None
875
876
879
882
885
886
902
904 """Associate the given object witth this identifier.
905
906 This is a one-way association: the object is not provided with
907 a return path to this identifier instance."""
908 self.__associatedObjects.add(obj)
910 """The set of objects that have been associated with this
911 identifier instance."""
912 return self.__associatedObjects
913 __associatedObjects = None
914
916 """Create a new UniqueIdentifier instance.
917
918 @param uid: The unique identifier string. If present, it is
919 the callers responsibility to ensure the value is universally
920 unique. If C{None}, one will be provided.
921 @type uid: C{str} or C{unicode}
922 """
923 assert (uid is None) or (self.uid() == uid), 'UniqueIdentifier: ctor %s, actual %s' % (uid, self.uid())
924 self.__associatedObjects = set()
925
927 if other is None:
928 return False
929 elif isinstance(other, UniqueIdentifier):
930 other_uid = other.uid()
931 elif isinstance(other, basestring):
932 other_uid = other
933 else:
934 raise TypeError('UniqueIdentifier: Cannot compare with type %s' % (type(other),))
935 return self.uid() == other_uid
936
938 return hash(self.uid())
939
942
944 return 'pyxb.utils.utility.UniqueIdentifier(%s)' % (repr(self.uid()),)
945
948 """A C{datetime.tzinfo} subclass that helps deal with UTC
949 conversions in an ISO8601 world.
950
951 This class only supports fixed offsets from UTC.
952 """
953
954
955 __Lexical_re = re.compile('^([-+])(\d\d):(\d\d)$')
956
957
958 __utcOffset_min = 0
959
960
961 __utcOffset_td = None
962
963
964 __ZeroDuration = datetime.timedelta(0)
965
966
967 __MaxOffset_td = datetime.timedelta(hours=14)
968
970 """Create a time zone instance with a fixed offset from UTC.
971
972 @param spec: Specifies the offset. Can be an integer counting
973 minutes east of UTC, the value C{None} (equal to 0 minutes
974 east), or a string that conform to the ISO8601 time zone
975 sequence (B{Z}, or B{[+-]HH:MM}).
976 """
977
978 if spec is not None:
979 if isinstance(spec, basestring):
980 if 'Z' == spec:
981 self.__utcOffset_min = 0
982 else:
983 match = self.__Lexical_re.match(spec)
984 if match is None:
985 raise ValueError('Bad time zone: %s' % (spec,))
986 self.__utcOffset_min = int(match.group(2)) * 60 + int(match.group(3))
987 if '-' == match.group(1):
988 self.__utcOffset_min = - self.__utcOffset_min
989 elif isinstance(spec, int):
990 self.__utcOffset_min = spec
991 elif isinstance(spec, datetime.timedelta):
992 self.__utcOffset_min = spec.seconds // 60
993 else:
994 raise TypeError('%s: unexpected type %s' % (type(self), type(spec)))
995 self.__utcOffset_td = datetime.timedelta(minutes=self.__utcOffset_min)
996 if self.__utcOffset_td < -self.__MaxOffset_td or self.__utcOffset_td > self.__MaxOffset_td:
997 raise ValueError('XSD timezone offset %s larger than %s' % (self.__utcOffset_td, self.__MaxOffset_td))
998 if 0 == self.__utcOffset_min:
999 self.__tzName = 'Z'
1000 elif 0 > self.__utcOffset_min:
1001 self.__tzName = '-%02d:%02d' % divmod(-self.__utcOffset_min, 60)
1002 else:
1003 self.__tzName = '+%02d:%02d' % divmod(self.__utcOffset_min, 60)
1004
1006 """Returns the constant offset for this zone."""
1007 return self.__utcOffset_td
1008
1010 """Return the name of the timezone in the format expected by XML Schema."""
1011 return self.__tzName
1012
1013 - def dst (self, dt):
1014 """Returns a constant zero duration."""
1015 return self.__ZeroDuration
1016
1021
1024
1027
1030
1032 """A C{datetime.tzinfo} subclass for the local time zone.
1033
1034 Mostly pinched from the C{datetime.tzinfo} documentation in Python 2.5.1.
1035 """
1036
1037 __STDOffset = datetime.timedelta(seconds=-time.timezone)
1038 __DSTOffset = __STDOffset
1039 if time.daylight:
1040 __DSTOffset = datetime.timedelta(seconds=-time.altzone)
1041 __ZeroDelta = datetime.timedelta(0)
1042 __DSTDelta = __DSTOffset - __STDOffset
1043
1048
1049 - def dst (self, dt):
1053
1056
1058 tt = (dt.year, dt.month, dt.day,
1059 dt.hour, dt.minute, dt.second,
1060 0, 0, -1)
1061 tt = time.localtime(time.mktime(tt))
1062 return tt.tm_isdst > 0
1063
1065 """Emulate the B{transient} keyword from Java for private member
1066 variables.
1067
1068 This class defines a C{__getstate__} method which returns a copy
1069 of C{self.__dict__} with certain members removed. Specifically,
1070 if a string "s" appears in a class member variable named
1071 C{__PrivateTransient} defined in the "Class" class, then the
1072 corresponding private variable "_Class__s" will be removed from
1073 the state dictionary. This is used to eliminate unnecessary
1074 fields from instances placed in L{namespace
1075 archives<pyxb.namespace.archive.NamespaceArchive>} without having
1076 to implement a C{__getstate__} method in every class in the
1077 instance hierarchy.
1078
1079 For an example, see
1080 L{pyxb.xmlschema.structures._SchemaComponent_mixin}
1081
1082 If you use this, it is your responsibility to define the
1083 C{__PrivateTransient} class variable and add to it the required
1084 variable names.
1085
1086 Classes that inherit from this are free to define their own
1087 C{__getstate__} method, which may or may not invoke the superclass
1088 one. If you do this, be sure that the class defining
1089 C{__getstate__} lists L{PrivateTransient_mixin} as one of its
1090 direct superclasses, lest the latter end up earlier in the mro and
1091 consequently bypass the local override.
1092 """
1093
1094
1095
1096 __Attribute = '__PrivateTransient'
1097
1099 state = self.__dict__.copy()
1100
1101
1102 attr = '_%s%s_' % (self.__class__.__name__, self.__Attribute)
1103 skipped = getattr(self.__class__, attr, None)
1104 if skipped is None:
1105 skipped = set()
1106 for cl in self.__class__.mro():
1107 for (k, v) in cl.__dict__.iteritems():
1108 if k.endswith(self.__Attribute):
1109 cl2 = k[:-len(self.__Attribute)]
1110 skipped.update([ '%s__%s' % (cl2, _n) for _n in v ])
1111 setattr(self.__class__, attr, skipped)
1112 for k in skipped:
1113 if state.get(k) is not None:
1114 del state[k]
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125 return state
1126
1127 -def GetMatchingFiles (path, pattern=None, default_path_wildcard=None, default_path=None, prefix_pattern=None, prefix_substituend=None):
1128 """Provide a list of absolute paths to files present in any of a
1129 set of directories and meeting certain criteria.
1130
1131 This is used, for example, to locate namespace archive files
1132 within the archive path specified by the user. One could use::
1133
1134 files = GetMatchingFiles('&bundles//:+',
1135 pattern=re.compile('.*\.wxs$'),
1136 default_path_wildcard='+',
1137 default_path='/usr/local/pyxb/nsarchives',
1138 prefix_pattern='&',
1139 prefix_substituend='/opt/pyxb')
1140
1141 to obtain all files that can be recursively found within
1142 C{/opt/pyxb/bundles}, or non-recursively within
1143 C{/usr/local/pyxb/nsarchives}.
1144
1145 @param path: A list of directories in which the search should be
1146 performed. The entries are separated by os.pathsep, which is a
1147 colon on POSIX platforms and a semi-colon on Windows. If a path
1148 entry ends with C{//} regardless of platform, the suffix C{//} is
1149 stripped and any directory beneath the path is scanned as well,
1150 recursively.
1151
1152 @keyword pattern: Optional regular expression object used to
1153 determine whether a given directory entry should be returned. If
1154 left as C{None}, all directory entries will be returned.
1155
1156 @keyword default_path_wildcard: An optional string which, if
1157 present as a single directory in the path, is replaced by the
1158 value of C{default-path}.
1159
1160 @keyword default_path: A system-defined directory which can be
1161 restored to the path by placing the C{default_path_wildcard} in
1162 the C{path}.
1163
1164 @keyword prefix_pattern: An optional string which, if present at
1165 the start of a path element, is replaced by the value of
1166 C{prefix_substituend}.
1167
1168 @keyword prefix_substituend: A system-defined string (path prefix)
1169 which can be combined with the user-provided path information to
1170 identify a file or subdirectory within an installation-specific
1171 area.
1172 """
1173 matching_files = []
1174 path_set = path.split(os.pathsep)
1175 while 0 < len(path_set):
1176 path = path_set.pop(0)
1177 if default_path_wildcard == path:
1178 if default_path is not None:
1179 path_set[0:0] = default_path.split(os.pathsep)
1180 default_path = None
1181 continue
1182 recursive = False
1183 if (prefix_pattern is not None) and path.startswith(prefix_pattern):
1184 path = os.path.join(prefix_substituend, path[len(prefix_pattern):])
1185 if path.endswith('//'):
1186 recursive = True
1187 path = path[:-2]
1188 if os.path.isfile(path):
1189 if (pattern is None) or (pattern.search(path) is not None):
1190 matching_files.append(path)
1191 else:
1192 for (root, dirs, files) in os.walk(path):
1193 for f in files:
1194 if (pattern is None) or (pattern.search(f) is not None):
1195 matching_files.append(os.path.join(root, f))
1196 if not recursive:
1197 break
1198 return matching_files
1199
1200 @BackfillComparisons
1201 -class Location (object):
1202 __locationBase = None
1203 __lineNumber = None
1204 __columnNumber = None
1205
1206 - def __init__ (self, location_base=None, line_number=None, column_number=None):
1207 if isinstance(location_base, str):
1208 location_base = intern(location_base)
1209 self.__locationBase = location_base
1210 self.__lineNumber = line_number
1211 self.__columnNumber = column_number
1212
1213 - def newLocation (self, locator=None, line_number=None, column_number=None):
1214 if locator is not None:
1215 try:
1216 line_number = locator.getLineNumber()
1217 column_number = locator.getColumnNumber()
1218 except:
1219 pass
1220 return Location(self.__locationBase, line_number, column_number)
1221
1222 locationBase = property(lambda _s: _s.__locationBase)
1223 lineNumber = property(lambda _s: _s.__lineNumber)
1224 columnNumber = property(lambda _s: _s.__columnNumber)
1225
1227 if v1 is None:
1228 if v2 is None:
1229 return None
1230 return 1
1231 if v2 is None:
1232 return -1
1233 if v1 < v2:
1234 return -1
1235 if v1 == v2:
1236 return 0
1237 return 1
1238
1246
1249
1251 """Comparison by locationBase, then lineNumber, then columnNumber."""
1252 if other is None:
1253 return False
1254 rv = self.__cmpTupleUnlessNone(self, other)
1255 if rv is None:
1256 return True
1257 return 0 == rv
1258
1260 if other is None:
1261 return False
1262 rv = self.__cmpTupleUnlessNone(self, other)
1263 if rv is None:
1264 return False
1265 return -1 == rv
1266
1275
1280
1293