1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 """Utility functions and classes."""
17
18 import re
19 import os
20 import errno
21 import pyxb
22 from pyxb.utils.six.moves.urllib import parse as urlparse
23 import time
24 import datetime
25 import logging
26 from pyxb.utils import six
27
28 _log = logging.getLogger(__name__)
31 """A dummy class used to hold arbitrary attributes.
32
33 Essentially this gives us a map without having to worry about
34 converting names to text to use as keys.
35 """
36 pass
37
39 """Class decorator that fills in missing ordering methods.
40
41 Concept derived from Python 2.7.5 functools.total_ordering,
42 but this version requires that __eq__ and __lt__ be provided,
43 and unconditionally overrides __ne__, __gt__, __le__, and __ge__
44 with the derived versions.
45
46 This is still necessary in Python 3 because in Python 3 the
47 comparison x >= y is done by the __ge__ inherited from object,
48 which does not handle the case where x and y are not the same type
49 even if the underlying y < x would convert x to be compatible. """
50
51 def applyconvert (cls, derived):
52 for (opn, opx) in derived:
53 opx.__name__ = opn
54 opx.__doc__ = getattr(int, opn).__doc__
55 setattr(cls, opn, opx)
56
57 applyconvert(cls, (
58 ('__gt__', lambda self, other: not (self.__lt__(other) or self.__eq__(other))),
59 ('__le__', lambda self, other: self.__lt__(other) or self.__eq__(other)),
60 ('__ge__', lambda self, other: not self.__lt__(other))
61 ))
62 applyconvert(cls, (
63 ('__ne__', lambda self, other: not self.__eq__(other)),
64 ))
65 return cls
66
68 """Tuple comparison that permits C{None} as lower than any value,
69 and defines other cross-type comparison.
70
71 @return: -1 if lhs < rhs, 0 if lhs == rhs, 1 if lhs > rhs."""
72 li = iter(lhs)
73 ri = iter(rhs)
74 while True:
75 try:
76 (lv, rv) = (next(li), next(ri))
77 if lv is None:
78 if rv is None:
79 continue
80 return -1
81 if rv is None:
82 return 1
83 if lv == rv:
84 continue
85 if lv < rv:
86 return -1
87 return 1
88 except StopIteration:
89 nl = len(lhs)
90 nr = len(rhs)
91 if nl < nr:
92 return -1
93 if nl == nr:
94 return 0
95 return 1
96
98 """Convert a string into a literal value that can be used in Python source.
99
100 This just calls C{repr}. No point in getting all complex when the language
101 already gives us what we need.
102
103 @rtype: C{str}
104 """
105 return repr(s)
106
108 """Default implementation for _XMLIdentifierToPython
109
110 For historical reasons, this converts the identifier from a str to
111 unicode in the system default encoding. This should have no
112 practical effect.
113
114 @param identifier : some XML identifier
115
116 @return: C{unicode(identifier)}
117 """
118
119 return six.text_type(identifier)
120
122 """Configure a callable L{MakeIdentifier} uses to pre-process an XM Lidentifier.
123
124 In Python3, identifiers can be full Unicode tokens, but in Python2,
125 all identifiers must be ASCII characters. L{MakeIdentifier} enforces
126 this by removing all characters that are not valid within an
127 identifier.
128
129 In some cases, an application generating bindings may be able to
130 transliterate Unicode code points that are not valid Python identifier
131 characters into something else. This callable can be assigned to
132 perform that translation before the invalid characters are
133 stripped.
134
135 It is not the responsibility of this callable to do anything other
136 than replace whatever characters it wishes to. All
137 transformations performed by L{MakeIdentifier} will still be
138 applied, to ensure the output is in fact a legal identifier.
139
140 @param xml_identifier_to_python : A callable that takes a string
141 and returns a Unicode, possibly with non-identifier characters
142 replaced by other characters. Pass C{None} to reset to the
143 default implementation, which is L{_DefaultXMLIdentifierToPython}.
144
145 @rtype: C{unicode}
146 """
147 global _XMLIdentifierToPython
148 if xml_identifier_to_python is None:
149 xml_identifier_to_python = _DefaultXMLIdentifierToPython
150 _XMLIdentifierToPython = xml_identifier_to_python
151
152 _XMLIdentifierToPython = _DefaultXMLIdentifierToPython
153
154 _UnderscoreSubstitute_re = re.compile(r'[- .]')
155 _NonIdentifier_re = re.compile(r'[^a-zA-Z0-9_]')
156 _PrefixUnderscore_re = re.compile(r'^_+')
157 _PrefixDigit_re = re.compile(r'^\d+')
158 _CamelCase_re = re.compile(r'_\w')
161 """Convert a string into something suitable to be a Python identifier.
162
163 The string is processed by L{_XMLIdentifierToPython}. Following
164 this, dashes, spaces, and periods are replaced by underscores, and
165 characters not permitted in Python identifiers are stripped.
166 Furthermore, any leading underscores are removed. If the result
167 begins with a digit, the character 'n' is prepended. If the
168 result is the empty string, the string 'emptyString' is
169 substituted.
170
171 No check is made for L{conflicts with keywords <DeconflictKeyword>}.
172
173 @keyword camel_case : If C{True}, any underscore in the result
174 string that is immediately followed by an alphanumeric is replaced
175 by the capitalized version of that alphanumeric. Thus,
176 'one_or_two' becomes 'oneOrTwo'. If C{False} (default), has no
177 effect.
178
179 @rtype: C{str}
180 """
181 s = _XMLIdentifierToPython(s)
182 s = _PrefixUnderscore_re.sub('', _NonIdentifier_re.sub('', _UnderscoreSubstitute_re.sub('_', s)))
183 if camel_case:
184 s = _CamelCase_re.sub(lambda _m: _m.group(0)[1].upper(), s)
185 if _PrefixDigit_re.match(s):
186 s = 'n' + s
187 if 0 == len(s):
188 s = 'emptyString'
189 return s
190
192 """Convert a string into something that can be a valid element in a
193 Python module path.
194
195 Module path elements are similar to identifiers, but may begin
196 with numbers and should not have leading underscores removed.
197 """
198 return _UnderscoreSubstitute_re.sub('_', _XMLIdentifierToPython(s))
199
200 _PythonKeywords = frozenset( (
201 "and", "as", "assert", "break", "class", "continue", "def", "del",
202 "elif", "else", "except", "exec", "finally", "for", "from", "global",
203 "if", "import", "in", "is", "lambda", "not", "or", "pass", "print",
204 "raise", "return", "try", "while", "with", "yield"
205 ) )
206 """Python keywords. Note that types like int and float are not
207 keywords.
208
209 @see: U{http://docs.python.org/reference/lexical_analysis.html#keywords}."""
210
211 _PythonBuiltInConstants = frozenset( (
212 "False", "True", "None", "NotImplemented", "Ellipsis", "__debug__",
213
214
215
216 "set"
217 ) )
218 """Other symbols that aren't keywords but that can't be used.
219
220 @see: U{http://docs.python.org/library/constants.html}."""
221
222 _Keywords = frozenset(_PythonKeywords.union(_PythonBuiltInConstants))
223 """The keywords reserved for Python, derived from L{_PythonKeywords}
224 and L{_PythonBuiltInConstants}."""
227 """If the provided string C{s} matches a Python language keyword,
228 append an underscore to distinguish them.
229
230 See also L{MakeUnique}.
231
232 @param s: string to be deconflicted
233
234 @keyword aux_keywords: optional iterable of additional strings
235 that should be treated as keywords.
236
237 @rtype: C{str}
238
239 """
240 if (s in _Keywords) or (s in aux_keywords):
241 return '%s_' % (s,)
242 return s
243
245 """Return an identifier based on C{s} that is not in the given set.
246
247 The returned identifier is made unique by appending an underscore
248 and, if necessary, a serial number.
249
250 The order is : C{x}, C{x_}, C{x_2}, C{x_3}, ...
251
252 @param in_use: The set of identifiers already in use in the
253 relevant scope. C{in_use} is updated to contain the returned
254 identifier.
255
256 @rtype: C{str}
257 """
258 if s in in_use:
259 ctr = 2
260 s = s.rstrip('_')
261 candidate = '%s_' % (s,)
262 while candidate in in_use:
263 candidate = '%s_%d' % (s, ctr)
264 ctr += 1
265 s = candidate
266 in_use.add(s)
267 return s
268
269 -def PrepareIdentifier (s, in_use, aux_keywords=frozenset(), private=False, protected=False):
270 """Combine everything required to create a unique identifier.
271
272 Leading and trailing underscores are stripped from all
273 identifiers.
274
275 @param in_use: the set of already used identifiers. Upon return
276 from this function, it is updated to include the returned
277 identifier.
278
279 @keyword aux_keywords: an optional set of additional symbols that
280 are illegal in the given context; use this to prevent conflicts
281 with known method names.
282
283 @keyword private: if C{False} (default), all leading underscores
284 are stripped, guaranteeing the identifier will not be private. If
285 C{True}, the returned identifier has two leading underscores,
286 making it a private variable within a Python class.
287
288 @keyword protected: as for C{private}, but uses only one
289 underscore.
290
291 @rtype: C{str}
292
293 @note: Only module-level identifiers should be treated as
294 protected. The class-level L{_DeconflictSymbols_mixin}
295 infrastructure does not include protected symbols. All class and
296 instance members beginning with a single underscore are reserved
297 for the PyXB infrastructure."""
298 s = DeconflictKeyword(MakeIdentifier(s).strip('_'), aux_keywords)
299 if private:
300 s = '__' + s
301 elif protected:
302 s = '_' + s
303 return MakeUnique(s, in_use)
304
307 """Mix-in used to deconflict public symbols in classes that may be
308 inherited by generated binding classes.
309
310 Some classes, like the L{pyxb.binding.basis.element} or
311 L{pyxb.binding.basis.simpleTypeDefinition} classes in
312 L{pyxb.binding.basis}, have public symbols associated with
313 functions and variables. It is possible that an XML schema might
314 include tags and attribute names that match these symbols. To
315 avoid conflict, the reserved symbols marked in this class are
316 added to the pre-defined identifier set.
317
318 Subclasses should create a class-level variable
319 C{_ReservedSymbols} that contains a set of strings denoting the
320 symbols reserved in this class, combined with those from any
321 superclasses that also have reserved symbols. Code like the
322 following is suggested::
323
324 # For base classes (direct mix-in):
325 _ReservedSymbols = set([ 'one', 'two' ])
326 # For subclasses:
327 _ReservedSymbols = SuperClass._ReservedSymbols.union(set([ 'three' ]))
328
329 Only public symbols (those with no underscores) are currently
330 supported. (Private symbols can't be deconflicted that easily,
331 and no protected symbols that derive from the XML are created by
332 the binding generator.)
333 """
334
335 _ReservedSymbols = set()
336 """There are no reserved symbols in the base class."""
337
338
339 __TabCRLF_re = re.compile("[\t\n\r]")
340
341 __MultiSpace_re = re.compile(" +")
344 """Normalize the given string.
345
346 Exactly one of the C{preserve}, C{replace}, and C{collapse} keyword
347 parameters must be assigned the value C{True} by the caller.
348
349 - C{preserve}: the text is returned unchanged.
350
351 - C{replace}: all tabs, newlines, and carriage returns are
352 replaced with ASCII spaces.
353
354 - C{collapse}: the C{replace} normalization is done, then
355 sequences of two or more spaces are replaced by a single space.
356
357 See the U{whiteSpace facet<http://www.w3.org/TR/xmlschema-2/#rf-whiteSpace>}.
358
359 @rtype: C{str}
360 """
361 if preserve:
362 return text
363 text = __TabCRLF_re.sub(' ', text)
364 if replace:
365 return text
366 if collapse:
367 return __MultiSpace_re.sub(' ', text).strip()
368
369 raise Exception('NormalizeWhitespace: No normalization specified')
370
372 """Represent a directed graph with arbitrary objects as nodes.
373
374 This is used in the L{code
375 generator<pyxb.binding.generate.Generator>} to determine order
376 dependencies among components within a namespace, and schema that
377 comprise various namespaces. An edge from C{source} to C{target}
378 indicates that some aspect of C{source} requires that some aspect
379 of C{target} already be available.
380 """
381
383 self.__roots = None
384 if root is not None:
385 self.__roots = set([root])
386 self.__edges = set()
387 self.__edgeMap = { }
388 self.__reverseMap = { }
389 self.__nodes = set()
390
391 __scc = None
392 __sccMap = None
393 __dfsOrder = None
394
395 - def addEdge (self, source, target):
396 """Add a directed edge from the C{source} to the C{target}.
397
398 The nodes are added to the graph if necessary.
399 """
400 self.__edges.add( (source, target) )
401 self.__edgeMap.setdefault(source, set()).add(target)
402 if source != target:
403 self.__reverseMap.setdefault(target, set()).add(source)
404 self.__nodes.add(source)
405 self.__nodes.add(target)
406
408 """Add the given node to the graph."""
409 self.__nodes.add(node)
410
411 __roots = None
412 - def roots (self, reset=False):
413 """Return the set of nodes calculated to be roots (i.e., those that have no incoming edges).
414
415 This caches the roots calculated in a previous invocation
416 unless the C{reset} keyword is given the value C{True}.
417
418 @note: Upon reset, any notes that had been manually added
419 using L{addNode} will no longer be in the set.
420
421 @keyword reset: If C{True}, any cached value is discarded and
422 recomputed. No effect if C{False} (defalut).
423
424 @rtype: C{set}
425 """
426 if reset or (self.__roots is None):
427 self.__roots = set()
428 for n in self.__nodes:
429 if not (n in self.__reverseMap):
430 self.__roots.add(n)
431 return self.__roots
433 """Add the provided node as a root node, even if it has incoming edges.
434
435 The node need not be present in the graph (if necessary, it is added).
436
437 Note that roots added in this way do not survive a reset using
438 L{roots}.
439
440 @return: C{self}
441 """
442 if self.__roots is None:
443 self.__roots = set()
444 self.__nodes.add(root)
445 self.__roots.add(root)
446 return self
447
449 """Return the edges in the graph.
450
451 The edge data structure is a map from the source node to the
452 set of nodes that can be reached in a single step from the
453 source.
454 """
455 return self.__edgeMap
456 __edgeMap = None
457
459 """Return the edges in the graph.
460
461 The edge data structure is a set of node pairs represented as C{( source, target )}.
462 """
463 return self.__edges
464
466 """Return the set of nodes in the graph.
467
468 The node collection data structure is a set containing node
469 objects, whatever they may be."""
470 return self.__nodes
471
472 - def tarjan (self, reset=False):
473 """Execute Tarjan's algorithm on the graph.
474
475 U{Tarjan's
476 algorithm<http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm>}
477 computes the U{strongly-connected
478 components<http://en.wikipedia.org/wiki/Strongly_connected_component>}
479 of the graph: i.e., the sets of nodes that form a minimal
480 closed set under edge transition. In essence, the loops. We
481 use this to detect groups of components that have a dependency
482 cycle.
483
484 @keyword reset: If C{True}, any cached component set is erased
485 and recomputed. If C{True}, an existing previous result is
486 left unchanged."""
487
488 if (self.__scc is not None) and (not reset):
489 return
490 self.__sccMap = { }
491 self.__stack = []
492 self.__sccOrder = []
493 self.__scc = []
494 self.__index = 0
495 self.__tarjanIndex = { }
496 self.__tarjanLowLink = { }
497 for v in self.__nodes:
498 self.__tarjanIndex[v] = None
499 roots = self.roots()
500 if (0 == len(roots)) and (0 < len(self.__nodes)):
501 raise Exception('TARJAN: No roots found in graph with %d nodes' % (len(self.__nodes),))
502 for r in roots:
503 self._tarjan(r)
504 self.__didTarjan = True
505
507 """Do the work of Tarjan's algorithm for a given root node."""
508 if self.__tarjanIndex.get(v) is not None:
509
510 return
511 self.__tarjanIndex[v] = self.__tarjanLowLink[v] = self.__index
512 self.__index += 1
513 self.__stack.append(v)
514 source = v
515 for target in self.__edgeMap.get(source, []):
516 if self.__tarjanIndex[target] is None:
517 self._tarjan(target)
518 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target])
519 elif target in self.__stack:
520 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target])
521 else:
522 pass
523
524 if self.__tarjanLowLink[v] == self.__tarjanIndex[v]:
525 scc = []
526 while True:
527 scc.append(self.__stack.pop())
528 if v == scc[-1]:
529 break
530 self.__sccOrder.append(scc)
531 if 1 < len(scc):
532 self.__scc.append(scc)
533 [ self.__sccMap.setdefault(_v, scc) for _v in scc ]
534
535 - def scc (self, reset=False):
536 """Return the strongly-connected components of the graph.
537
538 The data structure is a set, each element of which is itself a
539 set containing one or more nodes from the graph.
540
541 @see: L{tarjan}.
542 """
543 if reset or (self.__scc is None):
544 self.tarjan(reset)
545 return self.__scc
546 __scc = None
547
548 - def sccMap (self, reset=False):
549 """Return a map from nodes to the strongly-connected component
550 to which the node belongs.
551
552 @keyword reset: If C{True}, the L{tarjan} method will be
553 re-invoked, propagating the C{reset} value. If C{False}
554 (default), a cached value will be returned if available.
555
556 @see: L{tarjan}.
557 """
558 if reset or (self.__sccMap is None):
559 self.tarjan(reset)
560 return self.__sccMap
561 __sccMap = None
562
564 """Return the strongly-connected components in order.
565
566 The data structure is a list, in dependency order, of strongly
567 connected components (which can be single nodes). Appearance
568 of a node in a set earlier in the list indicates that it has
569 no dependencies on any node that appears in a subsequent set.
570 This order is preferred over L{dfsOrder} for code generation,
571 since it detects loops.
572
573 @see: L{tarjan}.
574 """
575 if reset or (self.__sccOrder is None):
576 self.tarjan(reset)
577 return self.__sccOrder
578 __sccOrder = None
579
581 """Return the strongly-connected component to which the given
582 node belongs.
583
584 Any keywords suppliend when invoking this method are passed to
585 the L{sccMap} method.
586
587 @return: The SCC set, or C{None} if the node is not present in
588 the results of Tarjan's algorithm."""
589
590 return self.sccMap(**kw).get(node)
591
593 """Return the cyclomatic complexity of the graph."""
594 self.tarjan()
595 return len(self.__edges) - len(self.__nodes) + 2 * len(self.__scc)
596
598 assert not (source in self.__dfsWalked)
599 self.__dfsWalked.add(source)
600 for target in self.__edgeMap.get(source, []):
601 if not (target in self.__dfsWalked):
602 self.__dfsWalk(target)
603 self.__dfsOrder.append(source)
604
606 node_map = { }
607 idx = 1
608 for n in self.__nodes:
609 node_map[n] = idx
610 idx += 1
611 text = []
612 text.append('digraph "%s" {' % (title,))
613 for n in self.__nodes:
614 if labeller is not None:
615 nn = labeller(n)
616 else:
617 nn = str(n)
618 text.append('%s [shape=box,label="%s"];' % (node_map[n], nn))
619 for s in self.__nodes:
620 for d in self.__edgeMap.get(s, []):
621 if s != d:
622 text.append('%s -> %s;' % (node_map[s], node_map[d]))
623 text.append("};")
624 return "\n".join(text)
625
627 """Return the nodes of the graph in U{depth-first-search
628 order<http://en.wikipedia.org/wiki/Depth-first_search>}.
629
630 The data structure is a list. Calculated lists are retained
631 and returned on future invocations, subject to the C{reset}
632 keyword.
633
634 @keyword reset: If C{True}, discard cached results and recompute the order."""
635 if reset or (self.__dfsOrder is None):
636 self.__dfsWalked = set()
637 self.__dfsOrder = []
638 for root in self.roots(reset=reset):
639 self.__dfsWalk(root)
640 self.__dfsWalked = None
641 if len(self.__dfsOrder) != len(self.__nodes):
642 raise Exception('DFS walk did not cover all nodes (walk %d versus nodes %d)' % (len(self.__dfsOrder), len(self.__nodes)))
643 return self.__dfsOrder
644
646 """Return the nodes of the graph as a sequence of root sets.
647
648 The first root set is the set of nodes that are roots: i.e.,
649 have no incoming edges. The second root set is the set of
650 nodes that have incoming nodes in the first root set. This
651 continues until all nodes have been reached. The sets impose
652 a partial order on the nodes, without being as constraining as
653 L{sccOrder}.
654
655 @return: a list of the root sets."""
656 order = []
657 nodes = set(self.__nodes)
658 edge_map = {}
659 for (d, srcs) in six.iteritems(self.__edgeMap):
660 edge_map[d] = srcs.copy()
661 while nodes:
662 freeset = set()
663 for n in nodes:
664 if not (n in edge_map):
665 freeset.add(n)
666 if 0 == len(freeset):
667 _log.error('dependency cycle in named components')
668 return None
669 order.append(freeset)
670 nodes.difference_update(freeset)
671 new_edge_map = {}
672 for (d, srcs) in six.iteritems(edge_map):
673 srcs.difference_update(freeset)
674 if 0 != len(srcs):
675 new_edge_map[d] = srcs
676 edge_map = new_edge_map
677 return order
678
679 LocationPrefixRewriteMap_ = { }
686
688 """Normalize a URI against an optional parent_uri in the way that is
689 done for C{schemaLocation} attribute values.
690
691 If no URI schema is present, this will normalize a file system
692 path.
693
694 Optionally, the resulting absolute URI can subsequently be
695 rewritten to replace specified prefix strings with alternative
696 strings, e.g. to convert a remote URI to a local repository. This
697 rewriting is done after the conversion to an absolute URI, but
698 before normalizing file system URIs.
699
700 @param uri : The URI to normalize. If C{None}, function returns
701 C{None}
702
703 @param parent_uri : The base URI against which normalization is
704 done, if C{uri} is a relative URI.
705
706 @param prefix_map : A map used to rewrite URI prefixes. If
707 C{None}, the value defaults to that stored by
708 L{SetLocationPrefixRewriteMap}.
709
710 """
711 if uri is None:
712 return uri
713 if parent_uri is None:
714 abs_uri = uri
715 else:
716 abs_uri = urlparse.urljoin(parent_uri, uri)
717 if prefix_map is None:
718 prefix_map = LocationPrefixRewriteMap_
719 for (pfx, sub) in six.iteritems(prefix_map):
720 if abs_uri.startswith(pfx):
721 abs_uri = sub + abs_uri[len(pfx):]
722 if 0 > abs_uri.find(':'):
723 abs_uri = os.path.realpath(abs_uri)
724 return abs_uri
725
728 """Retrieve the contents of the uri as raw data.
729
730 If the uri does not include a scheme (e.g., C{http:}), it is
731 assumed to be a file path on the local system."""
732
733 from pyxb.utils.six.moves.urllib.request import urlopen
734 stream = None
735 exc = None
736
737
738
739 if 0 <= uri.find(':'):
740 try:
741 stream = urlopen(uri)
742 except Exception as e:
743 exc = e
744 if (stream is None) and six.PY2:
745 import urllib
746 try:
747 stream = urllib.urlopen(uri)
748 exc = None
749 except:
750
751 pass
752 if stream is None:
753
754 try:
755 stream = open(uri, 'rb')
756 exc = None
757 except Exception as e:
758 if exc is None:
759 exc = e
760 if exc is not None:
761 _log.error('open %s', uri, exc_info=exc)
762 raise exc
763 try:
764
765
766 if isinstance(stream, six.moves.file) or isinstance(stream.fp, six.moves.file):
767 archive_directory = None
768 except:
769 pass
770 xmld = stream.read()
771 if archive_directory:
772 base_name = os.path.basename(os.path.normpath(urlparse.urlparse(uri)[2]))
773 counter = 1
774 dest_file = os.path.join(archive_directory, base_name)
775 while os.path.isfile(dest_file):
776 dest_file = os.path.join(archive_directory, '%s.%d' % (base_name, counter))
777 counter += 1
778 try:
779 OpenOrCreate(dest_file).write(xmld)
780 except OSError as e:
781 _log.warning('Unable to save %s in %s: %s', uri, dest_file, e)
782 return xmld
783
784 -def OpenOrCreate (file_name, tag=None, preserve_contents=False):
785 """Return a file object used to write binary data into the given file.
786
787 Use the C{tag} keyword to preserve the contents of existing files
788 that are not supposed to be overwritten.
789
790 To get a writable file but leaving any existing contents in place,
791 set the C{preserve_contents} keyword to C{True}. Normally, existing file
792 contents are erased.
793
794 The returned file pointer is positioned at the end of the file.
795
796 @keyword tag: If not C{None} and the file already exists, absence
797 of the given value in the first 4096 bytes of the file (decoded as
798 UTF-8) causes an C{IOError} to be raised with C{errno} set to
799 C{EEXIST}. I.e., only files with this value in the first 4KB will
800 be returned for writing.
801
802 @keyword preserve_contents: This value controls whether existing
803 contents of the file will be erased (C{False}, default) or left in
804 place (C{True}).
805 """
806 (path, leaf) = os.path.split(file_name)
807 if path:
808 try:
809 os.makedirs(path)
810 except Exception as e:
811 if not (isinstance(e, (OSError, IOError)) and (errno.EEXIST == e.errno)):
812 raise
813 fp = open(file_name, 'ab+')
814 if (tag is not None) and (0 < os.fstat(fp.fileno()).st_size):
815 fp.seek(0)
816 blockd = fp.read(4096)
817 blockt = blockd.decode('utf-8')
818 if 0 > blockt.find(tag):
819 raise OSError(errno.EEXIST, os.strerror(errno.EEXIST))
820 if not preserve_contents:
821 fp.seek(0)
822 fp.truncate()
823 else:
824 fp.seek(2)
825 return fp
826
827
828 __Hasher = None
829 try:
830 import hashlib
831 __Hasher = hashlib.sha1
832 except ImportError:
833 import sha
834 __Hasher = sha.new
835
836 -def HashForText (text):
837 """Calculate a cryptographic hash of the given string.
838
839 For example, this is used to verify that a given module file
840 contains bindings from a previous generation run for the same
841 namespace. See L{OpenOrCreate}. If the text is in Unicode, the
842 hash is calculated on the UTF-8 encoding of the text.
843
844 @return: A C{str}, generally a sequence of hexadecimal "digit"s.
845 """
846 if isinstance(text, six.text_type):
847 text = text.encode('utf-8')
848 return __Hasher(text).hexdigest()
849
850
851 __HaveUUID = False
852 try:
853 import uuid
854 __HaveUUID = True
855 except ImportError:
856 import random
858 """Obtain a UUID using the best available method. On a version of
859 python that does not incorporate the C{uuid} class, this creates a
860 string combining the current date and time (to the second) with a
861 random number.
862
863 @rtype: C{str}
864 """
865 if __HaveUUID:
866 return uuid.uuid1().urn
867 return '%s:%08.8x' % (time.strftime('%Y%m%d%H%M%S'), random.randint(0, 0xFFFFFFFF))
868
870 """Records a unique identifier, generally associated with a
871 binding generation action.
872
873 The identifier is a string, but gets wrapped in an instance of
874 this class to optimize comparisons and reduce memory footprint.
875
876 Invoking the constructor for this class on the same string
877 multiple times will return the same Python object.
878
879 An instance of this class compares equal to, and hashes equivalent
880 to, the uid string. When C{str}'d, the result is the uid; when
881 C{repr}'d, the result is a constructor call to
882 C{pyxb.utils.utility.UniqueIdentifier}.
883 """
884
885
886 __ExistingUIDs = {}
887
889 """The string unique identifier"""
890 return self.__uid
891 __uid = None
892
893
896
899
902
903
919
921 """Associate the given object witth this identifier.
922
923 This is a one-way association: the object is not provided with
924 a return path to this identifier instance."""
925 self.__associatedObjects.add(obj)
927 """The set of objects that have been associated with this
928 identifier instance."""
929 return self.__associatedObjects
930 __associatedObjects = None
931
933 """Create a new UniqueIdentifier instance.
934
935 @param uid: The unique identifier string. If present, it is
936 the callers responsibility to ensure the value is universally
937 unique. If C{None}, one will be provided.
938 @type uid: C{str} or C{unicode}
939 """
940 assert (uid is None) or (self.uid() == uid), 'UniqueIdentifier: ctor %s, actual %s' % (uid, self.uid())
941 self.__associatedObjects = set()
942
944 if other is None:
945 return False
946 elif isinstance(other, UniqueIdentifier):
947 other_uid = other.uid()
948 elif isinstance(other, six.string_types):
949 other_uid = other
950 else:
951 raise TypeError('UniqueIdentifier: Cannot compare with type %s' % (type(other),))
952 return self.uid() == other_uid
953
955 return hash(self.uid())
956
959
961 return 'pyxb.utils.utility.UniqueIdentifier(%s)' % (repr(self.uid()),)
962
965 """A C{datetime.tzinfo} subclass that helps deal with UTC
966 conversions in an ISO8601 world.
967
968 This class only supports fixed offsets from UTC.
969 """
970
971
972 __Lexical_re = re.compile('^([-+])(\d\d):(\d\d)$')
973
974
975 __utcOffset_min = 0
976
977
978 __utcOffset_td = None
979
980
981 __ZeroDuration = datetime.timedelta(0)
982
983
984 __MaxOffset_td = datetime.timedelta(hours=14)
985
987 """Create a time zone instance with a fixed offset from UTC.
988
989 @param spec: Specifies the offset. Can be an integer counting
990 minutes east of UTC, the value C{None} (equal to 0 minutes
991 east), or a string that conform to the ISO8601 time zone
992 sequence (B{Z}, or B{[+-]HH:MM}).
993 """
994
995 if spec is not None:
996 if isinstance(spec, six.string_types):
997 if 'Z' == spec:
998 self.__utcOffset_min = 0
999 else:
1000 match = self.__Lexical_re.match(spec)
1001 if match is None:
1002 raise ValueError('Bad time zone: %s' % (spec,))
1003 self.__utcOffset_min = int(match.group(2)) * 60 + int(match.group(3))
1004 if '-' == match.group(1):
1005 self.__utcOffset_min = - self.__utcOffset_min
1006 elif isinstance(spec, int):
1007 self.__utcOffset_min = spec
1008 elif isinstance(spec, datetime.timedelta):
1009 self.__utcOffset_min = spec.seconds // 60
1010 else:
1011 raise TypeError('%s: unexpected type %s' % (type(self), type(spec)))
1012 self.__utcOffset_td = datetime.timedelta(minutes=self.__utcOffset_min)
1013 if self.__utcOffset_td < -self.__MaxOffset_td or self.__utcOffset_td > self.__MaxOffset_td:
1014 raise ValueError('XSD timezone offset %s larger than %s' % (self.__utcOffset_td, self.__MaxOffset_td))
1015 if 0 == self.__utcOffset_min:
1016 self.__tzName = 'Z'
1017 elif 0 > self.__utcOffset_min:
1018 self.__tzName = '-%02d:%02d' % divmod(-self.__utcOffset_min, 60)
1019 else:
1020 self.__tzName = '+%02d:%02d' % divmod(self.__utcOffset_min, 60)
1021
1023 """Returns the constant offset for this zone."""
1024 return self.__utcOffset_td
1025
1027 """Return the name of the timezone in the format expected by XML Schema."""
1028 return self.__tzName
1029
1030 - def dst (self, dt):
1031 """Returns a constant zero duration."""
1032 return self.__ZeroDuration
1033
1038
1041
1044
1047
1049 """A C{datetime.tzinfo} subclass for the local time zone.
1050
1051 Mostly pinched from the C{datetime.tzinfo} documentation in Python 2.5.1.
1052 """
1053
1054 __STDOffset = datetime.timedelta(seconds=-time.timezone)
1055 __DSTOffset = __STDOffset
1056 if time.daylight:
1057 __DSTOffset = datetime.timedelta(seconds=-time.altzone)
1058 __ZeroDelta = datetime.timedelta(0)
1059 __DSTDelta = __DSTOffset - __STDOffset
1060
1065
1066 - def dst (self, dt):
1070
1073
1075 tt = (dt.year, dt.month, dt.day,
1076 dt.hour, dt.minute, dt.second,
1077 0, 0, -1)
1078 tt = time.localtime(time.mktime(tt))
1079 return tt.tm_isdst > 0
1080
1082 """Emulate the B{transient} keyword from Java for private member
1083 variables.
1084
1085 This class defines a C{__getstate__} method which returns a copy
1086 of C{self.__dict__} with certain members removed. Specifically,
1087 if a string "s" appears in a class member variable named
1088 C{__PrivateTransient} defined in the "Class" class, then the
1089 corresponding private variable "_Class__s" will be removed from
1090 the state dictionary. This is used to eliminate unnecessary
1091 fields from instances placed in L{namespace
1092 archives<pyxb.namespace.archive.NamespaceArchive>} without having
1093 to implement a C{__getstate__} method in every class in the
1094 instance hierarchy.
1095
1096 For an example, see
1097 L{pyxb.xmlschema.structures._SchemaComponent_mixin}
1098
1099 If you use this, it is your responsibility to define the
1100 C{__PrivateTransient} class variable and add to it the required
1101 variable names.
1102
1103 Classes that inherit from this are free to define their own
1104 C{__getstate__} method, which may or may not invoke the superclass
1105 one. If you do this, be sure that the class defining
1106 C{__getstate__} lists L{PrivateTransient_mixin} as one of its
1107 direct superclasses, lest the latter end up earlier in the mro and
1108 consequently bypass the local override.
1109 """
1110
1111
1112
1113 __Attribute = '__PrivateTransient'
1114
1116 state = self.__dict__.copy()
1117
1118
1119 attr = '_%s%s_' % (self.__class__.__name__, self.__Attribute)
1120 skipped = getattr(self.__class__, attr, None)
1121 if skipped is None:
1122 skipped = set()
1123 for cl in self.__class__.mro():
1124 for (k, v) in six.iteritems(cl.__dict__):
1125 if k.endswith(self.__Attribute):
1126 cl2 = k[:-len(self.__Attribute)]
1127 skipped.update([ '%s__%s' % (cl2, _n) for _n in v ])
1128 setattr(self.__class__, attr, skipped)
1129 for k in skipped:
1130 if state.get(k) is not None:
1131 del state[k]
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142 return state
1143
1144 -def GetMatchingFiles (path, pattern=None, default_path_wildcard=None, default_path=None, prefix_pattern=None, prefix_substituend=None):
1145 """Provide a list of absolute paths to files present in any of a
1146 set of directories and meeting certain criteria.
1147
1148 This is used, for example, to locate namespace archive files
1149 within the archive path specified by the user. One could use::
1150
1151 files = GetMatchingFiles('&bundles//:+',
1152 pattern=re.compile('.*\.wxs$'),
1153 default_path_wildcard='+',
1154 default_path='/usr/local/pyxb/nsarchives',
1155 prefix_pattern='&',
1156 prefix_substituend='/opt/pyxb')
1157
1158 to obtain all files that can be recursively found within
1159 C{/opt/pyxb/bundles}, or non-recursively within
1160 C{/usr/local/pyxb/nsarchives}.
1161
1162 @param path: A list of directories in which the search should be
1163 performed. The entries are separated by os.pathsep, which is a
1164 colon on POSIX platforms and a semi-colon on Windows. If a path
1165 entry ends with C{//} regardless of platform, the suffix C{//} is
1166 stripped and any directory beneath the path is scanned as well,
1167 recursively.
1168
1169 @keyword pattern: Optional regular expression object used to
1170 determine whether a given directory entry should be returned. If
1171 left as C{None}, all directory entries will be returned.
1172
1173 @keyword default_path_wildcard: An optional string which, if
1174 present as a single directory in the path, is replaced by the
1175 value of C{default-path}.
1176
1177 @keyword default_path: A system-defined directory which can be
1178 restored to the path by placing the C{default_path_wildcard} in
1179 the C{path}.
1180
1181 @keyword prefix_pattern: An optional string which, if present at
1182 the start of a path element, is replaced by the value of
1183 C{prefix_substituend}.
1184
1185 @keyword prefix_substituend: A system-defined string (path prefix)
1186 which can be combined with the user-provided path information to
1187 identify a file or subdirectory within an installation-specific
1188 area.
1189 """
1190 matching_files = []
1191 path_set = path.split(os.pathsep)
1192 while 0 < len(path_set):
1193 path = path_set.pop(0)
1194 if default_path_wildcard == path:
1195 if default_path is not None:
1196 path_set[0:0] = default_path.split(os.pathsep)
1197 default_path = None
1198 continue
1199 recursive = False
1200 if (prefix_pattern is not None) and path.startswith(prefix_pattern):
1201 path = os.path.join(prefix_substituend, path[len(prefix_pattern):])
1202 if path.endswith('//'):
1203 recursive = True
1204 path = path[:-2]
1205 if os.path.isfile(path):
1206 if (pattern is None) or (pattern.search(path) is not None):
1207 matching_files.append(path)
1208 else:
1209 for (root, dirs, files) in os.walk(path):
1210 for f in files:
1211 if (pattern is None) or (pattern.search(f) is not None):
1212 matching_files.append(os.path.join(root, f))
1213 if not recursive:
1214 break
1215 return matching_files
1216
1217 @BackfillComparisons
1218 -class Location (object):
1219 __locationBase = None
1220 __lineNumber = None
1221 __columnNumber = None
1222
1223 - def __init__ (self, location_base=None, line_number=None, column_number=None):
1224 if isinstance(location_base, str):
1225 location_base = six.moves.intern(location_base)
1226 self.__locationBase = location_base
1227 self.__lineNumber = line_number
1228 self.__columnNumber = column_number
1229
1230 - def newLocation (self, locator=None, line_number=None, column_number=None):
1231 if locator is not None:
1232 try:
1233 line_number = locator.getLineNumber()
1234 column_number = locator.getColumnNumber()
1235 except:
1236 pass
1237 return Location(self.__locationBase, line_number, column_number)
1238
1239 locationBase = property(lambda _s: _s.__locationBase)
1240 lineNumber = property(lambda _s: _s.__lineNumber)
1241 columnNumber = property(lambda _s: _s.__columnNumber)
1242
1244 if v1 is None:
1245 if v2 is None:
1246 return None
1247 return 1
1248 if v2 is None:
1249 return -1
1250 if v1 < v2:
1251 return -1
1252 if v1 == v2:
1253 return 0
1254 return 1
1255
1263
1266
1268 """Comparison by locationBase, then lineNumber, then columnNumber."""
1269 if other is None:
1270 return False
1271 rv = self.__cmpTupleUnlessNone(self, other)
1272 if rv is None:
1273 return True
1274 return 0 == rv
1275
1277 if other is None:
1278 return False
1279 rv = self.__cmpTupleUnlessNone(self, other)
1280 if rv is None:
1281 return False
1282 return -1 == rv
1283
1292
1297
1310
1312 """Filtered built-in repr for python 2/3 compatibility in
1313 generated bindings.
1314
1315 All generated string values are to be unicode. We always import
1316 unicode_literals from __future__, so we want plain quotes with no
1317 prefix u. Strip that off.
1318
1319 Integer constants should not have the suffix L even if they do not
1320 fit in a Python2 int. The references generated through this
1321 function are never used for calculations, so the implicit cast to
1322 a larger type is sufficient.
1323
1324 All other values use their standard representations.
1325 """
1326 if isinstance(v, six.string_types):
1327 qu = QuotedEscaped(v)
1328 if 'u' == qu[0]:
1329 return qu[1:]
1330 return qu
1331 if isinstance(v, six.integer_types):
1332 vs = repr(v)
1333 if vs.endswith('L'):
1334 return vs[:-1]
1335 return vs
1336 return repr(v)
1337