1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 """This module provides Finite Automata with Counters.
17
18 FACs are type of state machine where a transition may include a
19 constraint and a modification to a set of counters. They are used to
20 implement regular expressions with numerical constraints, as are found
21 in POSIX regexp, Perl, and XML schema.
22
23 The implementation here derives from U{Regular Expressions with
24 Numerical Constraints and Automata with Counters
25 <https://bora.uib.no/bitstream/1956/3628/3/Hovland_LNCS%205684.pdf>},
26 Dag Hovland, Lecture Notes in Computer Science, 2009, Volume 5684,
27 Theoretical Aspects of Computing - ICTAC 2009, Pages 231-245. In what
28 follows, this reference will be denoted B{HOV09}.
29
30 A regular expression is directly translated into a term tree, where
31 nodes are operators such as sequence, choice, and counter
32 restrictions, and the leaf nodes denote symbols in the language of the
33 regular expression.
34
35 In the case of XML content models, the symbols include L{element
36 declarations <pyxb.xmlschema.structures.ElementDeclaration>} and
37 L{wildcard elements <pyxb.xmlschema.structures.Wildcard>}. A
38 numerical constraint node corresponds to an L{XML particle
39 <pyxb.xmlschema.structures.Particle>}, and choice and sequence nodes
40 derive from L{model groups <pyxb.xmlschema.structures.ModelGroup>} of
41 types B{choice} and B{sequence}. As suggested in U{The Membership
42 Problem for Regular Expressions with Unordered Concatenation and
43 Numerical Constraints <http://www.ii.uib.no/~dagh/presLATA2012.pdf>}
44 the B{all} content model can be translated into state machine using
45 choice and sequence at the cost of a quadratic size explosion. Since
46 some XML content models might have a hundred terms in an unordered
47 catenation, this is not acceptable, and the implementation here
48 optimizes this construct by creating a leaf node in the automaton
49 which in turn contains sub-automata for each term, and permits an exit
50 transition only when all the terms that are required have been
51 completed.
52
53 @note: In XSD 1.1 the restriction that terms in an B{all} model group
54 occur at most once has been removed. Since the current implementation
55 removes a completed term from the set of available terms, this will
56 not work: instead the subconfiguration with its counter values must be
57 retained between matches.
58 """
59
60 import operator
61 import functools
62 import logging
63 from pyxb.utils import six
64 from pyxb.utils.six.moves import xrange
65
66 log_ = logging.getLogger(__name__)
70
72 """Exception raised when a FAC term tree is not a tree.
73
74 For example, a L{Symbol} node appears multiple times, or a cycle is detected."""
75
76 parent = None
77 """The L{MultiTermNode} containing the term that proves invalidity"""
78
79 term = None
80 """The L{Node} that proves invalidity"""
81
85
87 """Exception raised when an unsatisfied update instruction is executed.
88
89 This indicates an internal error in the implementation."""
90
91 update_instruction = None
92 """The L{UpdateInstruction} with an unsatisfied L{CounterCondition}"""
93
94 values = None
95 """The unsatisfying value map from L{CounterCondition} instances to integers"""
96
100
102 """Symbol rejected by L{Configuration_ABC.step}.
103
104 The exception indicates that the proposed symbol either failed to
105 produce a transition (L{UnrecognizedSymbolError}) or produced
106 multiple equally valid transitions
107 (L{NondeterministicSymbolError})."""
108
109 configuration = None
110 """The instance of L{Configuration_ABC} that raised the exception.
111 From L{Configuration_ABC.acceptableSymbols} you can determine what
112 alternatives might be present."""
113
114 symbol = None
115 """The symbol that was not accepted."""
116
120 acceptable = property(__get_acceptable)
121
125
127 """L{Configuration.step} failed to find a valid transition."""
128 pass
129
131 """L{Configuration.step} found multiple transitions."""
132 pass
133
135 """Mix-in used by symbols to provide a custom match implementation.
136
137 If a L{State.symbol} value is an instance of this mix-in, then it
138 will be used to validate a candidate symbol for a match."""
139
140 - def match (self, symbol):
141 raise NotImplementedError('%s.match' % (type(self).__name__,))
142
144 """A thin wrapper around an object reference.
145
146 The state of the automaton corresponds to a position, or marked
147 symbol, in the term tree. Because the same symbol may appear at
148 multiple locations in the tree, and the distinction between these
149 positions is critical, a L{State} wrapper is provided to maintain
150 distinct values."""
151
152 - def __init__ (self, symbol, is_initial, final_update=None, is_unordered_catenation=False):
153 """Create a FAC state.
154
155 @param symbol: The symbol associated with the state.
156 Normally initialized from the L{Symbol.metadata} value. The
157 state may be entered if, among other conditions, the L{match}
158 routine accepts the proposed input as being consistent with
159 this value.
160
161 @param is_initial: C{True} iff this state may serve as the
162 first state of the automaton.
163
164 @param final_update: C{None} if this state is not an
165 accepting state of the automaton; otherwise a set of
166 L{UpdateInstruction} values that must be satisfied by the
167 counter values in a configuration as a further restriction of
168 acceptance.
169
170 @param is_unordered_catenation: C{True} if this state has
171 subautomata that must be matched to execute the unordered
172 catenation of an L{All} node; C{False} if this is a regular
173 symbol."""
174 self.__symbol = symbol
175 self.__isInitial = not not is_initial
176 self.__finalUpdate = final_update
177 self.__isUnorderedCatenation = is_unordered_catenation
178
179 __automaton = None
181 """Link to the L{Automaton} to which the state belongs."""
182 return self.__automaton
184 """Method invoked during automaton construction to set state owner."""
185 assert self.__automaton is None
186 self.__automaton = automaton
187 return self
188 automaton = property(__get_automaton)
189
190 __symbol = None
192 """Application-specific metadata identifying the symbol.
193
194 See also L{match}."""
195 return self.__symbol
196 symbol = property(__get_symbol)
197
198 __isUnorderedCatenation = None
200 """Indicate whether the state has subautomata for unordered
201 catenation.
202
203 To reduce state explosion due to non-determinism, such a state
204 executes internal transitions in subautomata until all terms
205 have matched or a failure is discovered."""
206 return self.__isUnorderedCatenation
207 isUnorderedCatenation = property(__get_isUnorderedCatenation)
208
209 __subAutomata = None
211 """A sequence of sub-automata supporting internal state transitions.
212
213 This will return C{None} unless L{isUnorderedCatenation} is C{True}."""
214 return self.__subAutomata
219 subAutomata = property(__get_subAutomata)
220
221 __isInitial = None
223 """C{True} iff this state may be the first state the automaton enters."""
224 return self.__isInitial
225 isInitial = property(__get_isInitial)
226
227 __automatonEntryTransitions = None
229 """Return the set of initial transitions allowing entry to the automata through this state.
230
231 These are structurally-permitted transitions only, and must be
232 filtered based on the symbol that might trigger the
233 transition. The results are not filtered based on counter
234 value, since this value is used to determine how the
235 containing automaton might be entered. Consequently the
236 return value is the empty set unless this is an initial state.
237
238 The returned set is closed under entry to sub-automata,
239 i.e. it is guaranteed that each transition includes a
240 consuming state even if it requires a multi-element chain of
241 transitions into subautomata to reach one."""
242 if self.__automatonEntryTransitions is None:
243 transitions = []
244 if self.__isInitial:
245 xit = Transition(self, set())
246 if self.__subAutomata is None:
247 transitions.append(xit)
248 else:
249 for sa in self.__subAutomata:
250 for saxit in sa.initialTransitions:
251 transitions.append(xit.chainTo(saxit.makeEnterAutomatonTransition()))
252 self.__automatonEntryTransitions = transitions
253 return self.__automatonEntryTransitions
254 automatonEntryTransitions = property(__get_automatonEntryTransitions)
255
256 __finalUpdate = None
258 """Return the update instructions that must be satisfied for this to be a final state."""
259 return self.__finalUpdate
260 finalUpdate = property(__get_finalUpdate)
261
263 """Return the set of candidate transitions to enter a sub-automaton of this state.
264
265 @param sub_automata: A subset of the sub-automata of this
266 state which should contribute to the result. If C{None}, all
267 sub-automata are used.
268
269 @return: A pair C{(nullable, transitions)} where C{nullable}
270 is C{True} iff there is at least one sub-automaton that is in
271 an accepting state on entry, and C{transitions} is a list of
272 L{Transition} instances describing how to reach some state in
273 a sub-automaton via a consumed symbol.
274 """
275 assert self.__subAutomata is not None
276 is_nullable = True
277 transitions = []
278 if sub_automata is None:
279 sub_automata = self.__subAutomata
280 for sa in sub_automata:
281 if not sa.nullable:
282 is_nullable = False
283 transitions.extend(sa.initialTransitions)
284 return (is_nullable, transitions)
285
287 """C{True} iff this state is an accepting state for the automaton.
288
289 @param counter_values: Counter values that further validate
290 whether the requirements of the automaton have been met.
291
292 @return: C{True} if this is an accepting state and the
293 counter values relevant at it are satisfied."""
294 if self.__finalUpdate is None:
295 return False
296 return UpdateInstruction.Satisfies(counter_values, self.__finalUpdate)
297
298 __transitionSet = None
300 """Definitions of viable transitions from this state.
301
302 The transition set of a state is a set of L{Transition} nodes
303 identifying a state reachable in a single step from this
304 state, and a set of counter updates that must apply if the
305 transition is taken.
306
307 These transitions may not in themselves consume a symbol. For
308 example, if the destination state represents a match of an
309 L{unordered catenation of terms<All>}, then secondary
310 processing must be done to traverse into the automata for
311 those terms and identify transitions that include a symbol
312 consumption.
313
314 @note: Although conceptually the viable transitions are a set,
315 this implementation maintains them in a list so that order is
316 preserved when automata processing becomes non-deterministic.
317 PyXB is careful to build the transition list so that the
318 states are attempted in the order in which they appear in the
319 schema that define the automata.
320 """
321 return self.__transitionSet
322 transitionSet = property(__get_transitionSet)
323
325 """Method invoked during automaton construction to set the
326 legal transitions from the state.
327
328 The set of transitions cannot be defined until all states that
329 appear in it are available, so the creation of the automaton
330 requires that the association of the transition set be
331 delayed. (Though described as a set, the transitions are a
332 list where order reflects priority.)
333
334 @param transition_set: a list of pairs where the first
335 member is the destination L{State} and the second member is the
336 set of L{UpdateInstruction}s that apply when the automaton
337 transitions to the destination state."""
338
339 self.__transitionSet = []
340 seen = set()
341 for xit in transition_set:
342 if not (xit in seen):
343 seen.add(xit)
344 self.__transitionSet.append(xit)
345
346 - def match (self, symbol):
347 """Return C{True} iff the symbol matches for this state.
348
349 This may be overridden by subclasses when matching by
350 equivalence does not work. Alternatively, if the symbol
351 stored in this node is a subclass of L{SymbolMatch_mixin}, then
352 its match method will be used. Otherwise C{symbol} matches
353 only if it is equal to the L{symbol} of this state.
354
355 @param symbol: A candidate symbol corresponding to the
356 expression symbol for this state.
357
358 @return: C{True} iff C{symbol} is a match for this state.
359 """
360 if isinstance(self.__symbol, SymbolMatch_mixin):
361 return self.__symbol.match(symbol)
362 return self.__symbol == symbol
363
365 return 'S.%x' % (id(self),)
366
367 - def _facText (self):
368 rv = []
369 rv.extend(map(str, self.__transitionSet))
370 if self.__finalUpdate is not None:
371 if 0 == len(self.__finalUpdate):
372 rv.append('Final (no conditions)')
373 else:
374 rv.append('Final if %s' % (' '.join(map(lambda _ui: str(_ui.counterCondition), self.__finalUpdate))))
375 return '\n'.join(rv)
376
378 """A counter condition is a range limit on valid counter values.
379
380 Instances of this class serve as keys for the counters that
381 represent the configuration of a FAC. The instance also maintains
382 a pointer to application-specific L{metadata}."""
383
384 __min = None
386 """The minimum legal value for the counter.
387
388 This is a non-negative integer."""
389 return self.__min
390 min = property(__get_min)
391
392 __max = None
394 """The maximum legal value for the counter.
395
396 This is a positive integer, or C{None} to indicate that the
397 counter is unbounded."""
398 return self.__max
399 max = property(__get_max)
400
401 __metadata = None
405 metadata = property(__get_metadata)
406
407 - def __init__ (self, min, max, metadata=None):
408 """Create a counter condition.
409
410 @param min: The value for L{min}
411 @param max: The value for L{max}
412 @param metadata: The value for L{metadata}
413 """
414 self.__min = min
415 self.__max = max
416 self.__metadata = metadata
417
420
426
428 return not self.__eq__(other)
429
431 return 'C.%x{%s,%s}' % (id(self), self.min, self.max is not None and self.max or '')
432
434 """An update instruction pairs a counter with a mutation of that
435 counter.
436
437 The instruction is executed during a transition from one state to
438 another, and causes the corresponding counter to be incremented or
439 reset. The instruction may only be applied if doing so does not
440 violate the conditions of the counter it affects."""
441
442 __counterCondition = None
444 """A reference to the L{CounterCondition} identifying the
445 counter to be updated.
446
447 The counter condition instance is used as a key to the
448 dictionary maintaining current counter values."""
449 return self.__counterCondition
450 counterCondition = property(__get_counterCondition)
451
452 __doIncrement = None
454 """C{True} if the counter is to be incremented; C{False} if it is to be reset."""
455 return self.__doIncrement
456 doIncrement = property(__get_doIncrement)
457
458
459 __min = None
460 __max = None
461
462 - def __init__ (self, counter_condition, do_increment):
463 """Create an update instruction.
464
465 @param counter_condition: A L{CounterCondition} identifying a
466 minimum and maximum value for a counter, and serving as a map
467 key for the value of the corresponding counter.
468
469 @param do_increment: C{True} if the update is to increment
470 the value of the counter; C{False} if the update is to reset
471 the counter.
472 """
473 self.__counterCondition = counter_condition
474 self.__doIncrement = not not do_increment
475 self.__min = counter_condition.min
476 self.__max = counter_condition.max
477
479 """Implement a component of definition 5 from B{HOV09}.
480
481 The update instruction is satisfied by the counter values if
482 its action may be legitimately applied to the value of its
483 associated counter.
484
485 @param counter_values: A map from L{CounterCondition}s to
486 non-negative integers
487
488 @return: C{True} or C{False}
489 """
490 value = counter_values[self.__counterCondition]
491 if self.__doIncrement \
492 and (self.__max is not None) \
493 and (value >= self.__max):
494 return False
495 if (not self.__doIncrement) \
496 and (value < self.__min):
497 return False
498 return True
499
500 @classmethod
501 - def Satisfies (cls, counter_values, update_instructions):
502 """Return C{True} iff the counter values satisfy the update
503 instructions.
504
505 @param counter_values: A map from L{CounterCondition} to
506 integer counter values
507
508 @param update_instructions: A set of L{UpdateInstruction}
509 instances
510
511 @return: C{True} iff all instructions are satisfied by the
512 values and limits."""
513 for psi in update_instructions:
514 if not psi.satisfiedBy(counter_values):
515 return False
516 return True
517
518 - def apply (self, counter_values):
531
532 @classmethod
533 - def Apply (cls, update_instructions, counter_values):
534 """Apply the update instructions to the counter values.
535
536 @param update_instructions: A set of L{UpdateInstruction}
537 instances.
538
539 @param counter_values: A map from L{CounterCondition}
540 instances to non-negative integers. This map is updated
541 in-place by applying each instruction in
542 C{update_instructions}."""
543 for psi in update_instructions:
544 psi.apply(counter_values)
545
548
553
555 return not self.__eq__(other)
556
559
561 """Representation of a FAC state transition."""
562
563 __destination = None
565 """The transition destination state."""
566 return self.__destination
567 destination = property(__get_destination)
568
569 __updateInstructions = None
571 """The set of counter updates that are applied when the transition is taken."""
572 return self.__updateInstructions
573 updateInstructions = property(__get_updateInstructions)
574
575 __nextTransition = None
577 """The next transition to apply in this chain.
578
579 C{None} if this is the last transition in the chain."""
580 return self.__nextTransition
581 nextTransition = property(__get_nextTransition)
582
583 __layerLink = None
585 """A directive relating to changing automaton layer on transition.
586
587 C{None} indicates this transition is from one state to another
588 within a single automaton.
589
590 An instance of L{Configuration} is a transition on completion
591 of a subautomaton back to the configuration in the parent
592 automaton. The L{destination} is the state in the parent automaton.
593
594 An instance of L{Automaton} requires creation of a
595 sub-configuration and initial entry into the automaton. The
596 L{destination} is the state in the sub-automaton.
597 """
598 return self.__layerLink
599 layerLink = property(__get_layerLink)
600
601 - def __init__ (self, destination, update_instructions, layer_link=None):
602 """Create a transition to a state.
603
604 @param destination: the state into which the transition is
605 made
606
607 @param update_instructions: A iterable of L{UpdateInstruction}s
608 denoting the changes that must be made to counters as a
609 consequence of taking the transition.
610
611 @keyword layer_link: The value for L{layerLink}."""
612 self.__destination = destination
613 if not isinstance(update_instructions, list):
614 update_instructions = list(update_instructions)
615 self.__updateInstructions = update_instructions
616 self.__layerLink = layer_link
617
629
631 """Return the L{symbol<State.symbol>} of the L{consumingState}."""
632 return self.consumingState().symbol
633
664
665 - def apply (self, configuration, clone_map=None):
666 """Apply the transitition to a configuration.
667
668 This updates the configuration counter values based on the
669 update instructions, and sets the new configuration state.
670
671 @note: If the transition involves leaving a sub-automaton or
672 creating a new sub-automaton, the returned configuration
673 structure will be different from the one passed in. You
674 should invoke this as::
675
676 cfg = transition.apply(cfg)
677
678 @param configuration: A L{Configuration} of an executing automaton
679
680 @param clone_map: A map from L{Configuration} to
681 L{Configuration} reflecting the replacements made when the
682 configuration for which the transition was calculated was
683 subsequently cloned into the C{configuration} passed into this
684 method. This is only necessary when the transition includes
685 layer transitions.
686
687 @return: The resulting configuration
688 """
689 layer_link = self.__layerLink
690 if isinstance(layer_link, Configuration):
691 if clone_map is not None:
692 layer_link = clone_map[layer_link]
693 configuration = layer_link.leaveAutomaton(configuration)
694 elif isinstance(layer_link, Automaton):
695 configuration = configuration.enterAutomaton(layer_link)
696 UpdateInstruction.Apply(self.updateInstructions, configuration._get_counterValues())
697 configuration._set_state(self.destination, layer_link is None)
698 if self.__nextTransition is None:
699 return configuration
700 return self.__nextTransition.apply(configuration, clone_map)
701
702 - def chainTo (self, next_transition):
703 """Duplicate the state and chain the duplicate to a successor
704 transition.
705
706 This returns a new transition which applies the operation for
707 this transition, then proceeds to apply the next transition in
708 the chain.
709
710 @note: The node that is invoking this must not have successor
711 transitions.
712
713 @param next_transition: A L{Transition} node describing a
714 subsequent transition.
715
716 @return: a clone of this node, augmented with a link to
717 C{next_transition}."""
718 assert not self.__nextTransition
719 head = type(self)(self.__destination, self.__updateInstructions, layer_link=self.__layerLink)
720 head.__nextTransition = next_transition
721 return head
722
733
739
746
748 return not self.__eq__(other)
749
764
766 """Base class for something that represents an L{Automaton} in
767 execution.
768
769 For deterministic automata, this is generally a L{Configuration}
770 which records the current automaton state along with its counter
771 values.
772
773 For non-deterministic automata, this is a L{MultiConfiguration}
774 which records a set of L{Configuration}s."""
775
777 """Return the acceptable L{Symbol}s given the current
778 configuration.
779
780 This method extracts the symbol from all candidate transitions
781 that are permitted based on the current counter values.
782 Because transitions are presented in a preferred order, the
783 symbols are as well."""
784 raise NotImplementedError('%s.acceptableSymbols' % (type(self).__name__,))
785
786 - def step (self, symbol):
787 """Execute an automaton transition using the given symbol.
788
789 @param symbol: A symbol from the alphabet of the automaton's
790 language. This is a Python value that should be accepted by
791 the L{SymbolMatch_mixin.match} method of a L{State.symbol}.
792 It is not a L{Symbol} instance.
793
794 @return: The new configuration resulting from the step.
795
796 @raises AutomatonStepError: L{UnrecognizedSymbolError}
797 when no transition compatible with C{symbol} is available, and
798 L{NondeterministicSymbolError} if C{symbol} admits multiple
799 transitions and the subclass does not support
800 non-deterministic steps (see L{MultiConfiguration}).
801
802 @warning: If the step entered or left a sub-automaton the
803 return value will not be the configuration that was used to
804 execute the step. The proper pattern for using this method
805 is::
806
807 cfg = cfg.step(sym)
808
809 """
810 raise NotImplementedError('%s.step' % (type(self).__name__,))
811
813 """The state of an L{Automaton} in execution.
814
815 This combines a state node of the automaton with a set of counter
816 values."""
817
818 __state = None
820 """The state of the configuration.
821
822 This is C{None} to indicate an initial state, or one of the underlying automaton's states."""
823 return self.__state
850 state = property(__get_state)
851
852 __counterValues = None
853 """The values of the counters.
854
855 This is a map from the CounterCondition instances of the
856 underlying automaton to integer values."""
859
860 __automaton = None
863 automaton = property(__get_automaton)
864
865 __subConfiguration = None
867 """Reference to configuration being executed in a sub-automaton.
868
869 C{None} if no sub-automaton is active, else a reference to a
870 configuration that is being executed in a sub-automaton.
871
872 Sub-configurations are used to match sub-terms in an
873 L{unordered catenation<All>} term. A configuration may have
874 at most one sub-configuration at a time, and the configuration
875 will be removed and possibly replaced when the term being
876 processed completes."""
877 return self.__subConfiguration
878 subConfiguration = property(__get_subConfiguration)
879
880 __superConfiguration = None
882 """Reference to the configuration for which this is a
883 sub-configuration.
884
885 C{None} if no super-automaton is active, else a reference to a
886 configuration that is being executed in a super-automaton.
887
888 The super-configuration relation persists for the lifetime of
889 the configuration."""
890 return self.__superConfiguration
891 superConfiguration = property(__get_superConfiguration)
892
893 __subAutomata = None
895 """A set of automata that must be satisfied before the current state can complete.
896
897 This is used in unordered catenation. Each sub-automaton
898 represents a term in the catenation. When the configuration
899 enters a state with sub-automata, a set containing references
900 to those automata is assigned to this attribute.
901 Subsequently, until all automata in the state are satisfied,
902 transitions can only occur within an active sub-automaton, out
903 of the active sub-automaton if it is in an accepting state,
904 and into a new sub-automaton if no sub-automaton is active.
905 """
906 return self.__subAutomata
909 subAutomata = property(__get_subAutomata)
910
912 """Create a transition back to the containing configuration.
913
914 This is done when a configuration is in an accepting state and
915 there are candidate transitions to other states that must be
916 considered. The transition does not consume a symbol."""
917 assert self.__superConfiguration is not None
918 return Transition(self.__superConfiguration.__state, set(), layer_link=self.__superConfiguration)
919
921 """Execute steps to leave a sub-automaton.
922
923 @param sub_configuration: The configuration associated with
924 the automata that has completed.
925
926 @return: C{self}"""
927 assert sub_configuration.__superConfiguration == self
928 self.__subConfiguration = None
929 return self
930
948
951
958
960 """Return list of viable transitions on C{symbol}
961
962 The transitions that are structurally permitted from this
963 state, in order, filtering out those transitions where the
964 update instruction is not satisfied by the configuration
965 counter values and optionally those for which the symbol does
966 not match.
967
968 @param symbol: A symbol through which a transition from this
969 state is intended. A value of C{None} indicates that the set
970 of transitions should ignore the symbol; candidates are still
971 filtered based on the counter state of the configuration.
972
973 @return: A list of L{Transition} instances permitted from the
974 current configuration. If C{symbol} is not C{None},
975 transitions that would not accept the symbol are excluded.
976 Any transition that would require an unsatisfied counter
977 update is also excluded. Non-deterministic automata may
978 result in a lits with multiple members. """
979
980 fac = self.__automaton
981 transitions = []
982 if symbol is None:
983 match_filter = lambda _xit: True
984 else:
985 match_filter = lambda _xit: _xit.consumingState().match(symbol)
986 update_filter = lambda _xit: _xit.satisfiedBy(self)
987
988 if self.__state is None:
989
990 transitions.extend(fac.initialTransitions)
991 elif (self.__subConfiguration is not None) and not self.__subConfiguration.isAccepting():
992
993
994 pass
995 else:
996
997
998 include_local = True
999 if self.__subAutomata:
1000
1001
1002
1003 (include_local, sub_initial) = self.__state.subAutomataInitialTransitions(self.__subAutomata)
1004 transitions.extend(map(lambda _xit: _xit.makeEnterAutomatonTransition(), sub_initial))
1005 if include_local:
1006
1007 for xit in filter(update_filter, self.__state.transitionSet):
1008 if xit.consumingState() is not None:
1009 transitions.append(xit)
1010 else:
1011
1012
1013
1014
1015 (_, sub_initial) = xit.destination.subAutomataInitialTransitions()
1016 transitions.extend(map(lambda _xit: xit.chainTo(_xit.makeEnterAutomatonTransition()), sub_initial))
1017 if (self.__superConfiguration is not None) and self.isAccepting():
1018
1019 lxit = self.makeLeaveAutomatonTransition()
1020 supxit = self.__superConfiguration.candidateTransitions(symbol)
1021 transitions.extend(map(lambda _sx: lxit.chainTo(_sx), supxit))
1022 assert len(frozenset(transitions)) == len(transitions)
1023 return list(filter(update_filter, filter(match_filter, transitions)))
1024
1027
1028 - def step (self, symbol):
1035
1037 """Return C{True} iff no transitions have ever been made."""
1038 return self.__state is None
1039
1054
1055 - def __init__ (self, automaton, super_configuration=None):
1059
1060 - def clone (self, clone_map=None):
1061 """Clone a configuration and its descendents.
1062
1063 This is used for parallel execution where a configuration has
1064 multiple candidate transitions and must follow all of them.
1065 It clones the entire chain of configurations through
1066 multiple layers.
1067
1068 @param clone_map: Optional map into which the translation from
1069 the original configuration object to the corresponding cloned
1070 configuration object can be reconstructed, e.g. when applying
1071 a transition that includes automata exits referencing
1072 superconfigurations from the original configuration.
1073 """
1074 if clone_map is None:
1075 clone_map = {}
1076 root = self
1077 while root.__superConfiguration is not None:
1078 root = root.__superConfiguration
1079 root = root._clone(clone_map, None)
1080 return clone_map.get(self)
1081
1082 - def _clone (self, clone_map, super_configuration):
1094
1097
1099 """Support parallel execution of state machine.
1100
1101 This holds a set of configurations, and executes each transition
1102 on each one. Configurations which fail to accept a step are
1103 silently dropped; only if this results in no remaining
1104 configurations will L{UnrecognizedSymbolError} be raised. If a
1105 step admits multiple valid transitions, a configuration is added
1106 for each one.
1107
1108 See L{pyxb.binding.content.AutomatonConfiguration} for an
1109 alternative solution which holds actions associated with the
1110 transition until the non-determinism is resolved."""
1111
1112 __configurations = None
1113
1116
1122
1123 - def step (self, symbol):
1124 next_configs = []
1125 for cfg in self.__configurations:
1126 transitions = cfg.candidateTransitions(symbol)
1127 if 0 == len(transitions):
1128 pass
1129 elif 1 == len(transitions):
1130 next_configs.append(transitions[0].apply(cfg))
1131 else:
1132 for transition in transitions:
1133 clone_map = {}
1134 ccfg = cfg.clone(clone_map)
1135 next_configs.append(transition.apply(ccfg, clone_map))
1136 if 0 == len(next_configs):
1137 raise UnrecognizedSymbolError(self, symbol)
1138 assert len(frozenset(next_configs)) == len(next_configs)
1139 self.__configurations = next_configs
1140 return self
1141
1143 """Return the set of configurations that are in an accepting state.
1144
1145 Note that some of the configurations may be within a
1146 sub-automaton; their presence in the return value is because
1147 the root configuration is also accepting."""
1148 accepting = []
1149 for cfg in self.__configurations:
1150 rcfg = cfg
1151
1152
1153 while rcfg.superConfiguration is not None:
1154 rcfg = rcfg.superConfiguration
1155 if rcfg.isAccepting():
1156 accepting.append(cfg)
1157 return accepting
1158
1160 """Representation of a Finite Automaton with Counters.
1161
1162 This has all the standard FAC elements, plus links to other
1163 states/automata as required to support the nested automata
1164 construct used for matching unordered catenation terms."""
1165 __states = None
1167 """The set of L{State}s in the automaton.
1168
1169 These correspond essentially to marked symbols in the original
1170 regular expression, or L{element
1171 declarations<pyxb.xmlschema.structures.ElementDeclaration>} in
1172 an XML schema.
1173
1174 @note: These are conceptually a set and are stored that way.
1175 When an L{Automaton} is constructed the incoming states should
1176 be passed as a list so the calculated initial transitions are
1177 executed in a deterministic order."""
1178 return self.__states
1179 states = property(__get_states)
1180
1181 __counterConditions = None
1183 """The set of L{CounterCondition}s in the automaton.
1184
1185 These are marked positions in the regular expression, or
1186 L{particles<pyxb.xmlschema.structures.Particle>} in an XML
1187 schema, paired with their occurrence constraints."""
1188 return self.__counterConditions
1189 counterConditions = property(__get_counterConditions)
1190
1191 __nullable = None
1193 """C{True} iff the automaton accepts the empty string."""
1194 return self.__nullable
1195 nullable = property(__get_nullable)
1196
1197 __initialTransitions = None
1199 """The set of transitions that may be made to enter the automaton.
1200
1201 These are full transitions, including chains into subautomata
1202 if an initial state represents a node with sub-automata.
1203
1204 @note: As with L{State.transitionSet}, the set is represented
1205 as a list to preserve priority when resolving
1206 non-deterministic matches."""
1207 return self.__initialTransitions
1208 initialTransitions = property(__get_initialTransitions)
1209
1210 __containingState = None
1212 """The L{State} instance for which this is a sub-automaton.
1213
1214 C{None} if this is not a sub-automaton."""
1215 return self.__containingState
1216 containingState = property(__get_containingState)
1217
1218 __finalStates = None
1220 """The set of L{State} members which can terminate a match."""
1221 return self.__finalStates
1222 finalStates = property(__get_finalStates)
1223
1224 - def __init__ (self, states, counter_conditions, nullable, containing_state=None):
1242
1244 """Return a new L{Configuration} instance for this automaton."""
1245 return Configuration(self)
1246
1262
1263 -class Node (object):
1264 """Abstract class for any node in the term tree.
1265
1266 In its original form a B{position} (C{pos}) is a tuple of
1267 non-negative integers comprising a path from a node in the term
1268 tree. It identifies a node in the tree. After the FAC has been
1269 constructed, only positions that are leaf nodes in the term tree
1270 remain, and the corresponding symbol value (Python instance) is
1271 used as the position.
1272
1273 An B{update instruction} (C{psi}) is a map from positions to
1274 either L{Node.RESET} or L{Node.INCREMENT}. It identifies actions
1275 to be taken on the counter states corresponding to the positions
1276 in its domain.
1277
1278 A B{transition} is a pair containing a position and an update
1279 instruction. It identifies a potential next node in the state and
1280 the updates that are to be performed if the transition is taken.
1281
1282 A B{follow value} is a map from a position to a set of transitions
1283 that may originate from the pos. This set is represented as a
1284 Python list since update instructions are dicts and cannot be
1285 hashed.
1286 """
1287
1288 _Precedence = None
1289 """An integral value used for parenthesizing expressions.
1290
1291 A subterm that has a precedence less than that of its containing
1292 term must be enclosed in parentheses when forming a text
1293 expression representing the containing term."""
1294
1295 RESET = False
1296 """An arbitrary value representing reset of a counter."""
1297
1298 INCREMENT = True
1299 """An arbitrary value representing increment of a counter."""
1300
1302 """Create a FAC term-tree node.
1303
1304 @keyword metadata: Any application-specific metadata retained in
1305 the term tree for transfer to the resulting automaton."""
1306 self.__metadata = kw.get('metadata')
1307
1308 - def clone (self, *args, **kw):
1309 """Create a deep copy of the node.
1310
1311 All term-tree--related attributes and properties are replaced
1312 with deep clones. Other attributes are preserved.
1313
1314 @param args: A tuple of arguments to be passed to the instance
1315 constructor.
1316
1317 @param kw: A dict of keywords to be passed to the instance
1318 constructor.
1319
1320 @note: Subclasses should pre-extend this method to augment the
1321 C{args} and C{kw} parameters as necessary to match the
1322 expectations of the C{__init__} method of the class being
1323 cloned."""
1324 kw.setdefault('metadata', self.metadata)
1325 return type(self)(*args, **kw)
1326
1327 __metadata = None
1331 metadata = property(__get_metadata)
1332
1333 __first = None
1335 """The I{first} set for the node.
1336
1337 This is the set of positions leading to symbols that can
1338 appear first in a string matched by an execution starting at
1339 the node."""
1340 if self.__first is None:
1341 self.__first = frozenset(self._first())
1342 return self.__first
1343 first = property(__get_first)
1344
1346 """Abstract method that defines L{first} for the subclass.
1347
1348 The return value should be an iterable of tuples of integers
1349 denoting paths from this node through the term tree to a
1350 symbol."""
1351 raise NotImplementedError('%s.first' % (type(self).__name__,))
1352
1353 __last = None
1355 """The I{last} set for the node.
1356
1357 This is the set of positions leading to symbols that can
1358 appear last in a string matched by an execution starting at
1359 the node."""
1360 if self.__last is None:
1361 self.__last = frozenset(self._last())
1362 return self.__last
1363 last = property(__get_last)
1364
1366 """Abstract method that defines L{last} for the subclass.
1367
1368 The return value should be an iterable of tuples of integers
1369 denoting paths from this node through the term tree to a
1370 symbol."""
1371 raise NotImplementedError('%s.last' % (type(self).__name__,))
1372
1373 __nullable = None
1379 nullable = property(__get_nullable)
1380
1382 """Abstract method that defines L{nullable} for the subclass.
1383
1384 The return value should be C{True} or C{False}."""
1385 raise NotImplementedError('%s.nullable' % (type(self).__name__,))
1386
1387 __follow = None
1393 follow = property(__get_follow)
1394
1396 """Abstract method that defines L{follow} for the subclass.
1397
1398 The return value should be a map from tuples of integers (positions)
1399 to a list of transitions, where a transition is a position and
1400 an update instruction."""
1401 raise NotImplementedError('%s.follow' % (type(self).__name__,))
1402
1404 """Reset any term-tree state associated with the node.
1405
1406 Any change to the structure of the term tree in which the node
1407 appears invalidates memoized first/follow sets and related
1408 information. This method clears all that data so it can be
1409 recalculated. It does not clear the L{metadata} link, or any
1410 existing structural data."""
1411 self.__first = None
1412 self.__last = None
1413 self.__nullable = None
1414 self.__follow = None
1415 self.__counterPositions = None
1416
1418 """Utility function for term tree processing.
1419
1420 @param pre: a callable that, unless C{None}, is invoked at
1421 each node C{n} with parameters C{n}, C{pos}, and C{arg}, where
1422 C{pos} is the tuple of integers identifying the path from the
1423 node at on which this method was invoked to the node being
1424 processed. The invocation occurs before processing any
1425 subordinate nodes.
1426
1427 @param post: as with C{pre} but invocation occurs after
1428 processing any subordinate nodes.
1429
1430 @param arg: a value passed to invocations of C{pre} and
1431 C{post}."""
1432 self._walkTermTree((), pre, post, arg)
1433
1435 """Abstract method implementing L{walkTermTree} for the subclass."""
1436 raise NotImplementedError('%s.walkTermTree' % (type(self).__name__,))
1437
1438 __posNodeMap = None
1440 """A map from positions to nodes in the term tree."""
1441 if self.__posNodeMap is None:
1442 pnm = { }
1443 self.walkTermTree(lambda _n,_p,_a: _a.setdefault(_p, _n), None, pnm)
1444 self.__posNodeMap = pnm
1445 return self.__posNodeMap
1446 posNodeMap = property(__get_posNodeMap)
1447
1448 __nodePosMap = None
1457 nodePosMap = property(__get_nodePosMap)
1458
1459 @classmethod
1461 """Implement definition 11.1 in B{HOV09}."""
1462 return frozenset([ pos + _mp for _mp in pos_set ])
1463
1464 @classmethod
1466 """Implement definition 11.2 in B{HOV09}"""
1467 rv = {}
1468 for (q, v) in six.iteritems(psi):
1469 rv[pos + q] = v
1470 return rv
1471
1472 @classmethod
1474 """Implement definition 11.3 in B{HOV09}"""
1475 ts = []
1476 for (q, psi) in transition_set:
1477 ts.append((pos + q, cls._PosConcatUpdateInstruction(pos, psi) ))
1478 return ts
1479
1485
1487
1488
1489
1490 self.walkTermTree(self.__resetAndValidate, None, set())
1491
1492 counter_map = { }
1493 for pos in self.counterPositions:
1494 nci = self.posNodeMap.get(pos)
1495 assert isinstance(nci, NumericalConstraint)
1496 assert nci not in counter_map
1497 counter_map[pos] = ctr_cond_ctor(nci.min, nci.max, nci.metadata)
1498 counters = list(six.itervalues(counter_map))
1499
1500 state_map = { }
1501 for pos in six.iterkeys(self.follow):
1502 sym = self.posNodeMap.get(pos)
1503 assert isinstance(sym, LeafNode)
1504 assert sym not in state_map
1505
1506
1507
1508 is_initial = pos in self.first
1509
1510
1511
1512 final_update = None
1513 if (() == pos and sym.nullable) or (pos in self.last):
1514
1515
1516
1517 final_update = set()
1518 for nci in map(counter_map.get, self.counterSubPositions(pos)):
1519 final_update.add(UpdateInstruction(nci, False))
1520 state_map[pos] = state_ctor(sym.metadata, is_initial=is_initial, final_update=final_update, is_unordered_catenation=isinstance(sym, All))
1521 if isinstance(sym, All):
1522 state_map[pos]._set_subAutomata(*map(lambda _s: _s.buildAutomaton(state_ctor, ctr_cond_ctor, containing_state=state_map[pos]), sym.terms))
1523 states = list(six.itervalues(state_map))
1524
1525 for (spos, transition_set) in six.iteritems(self.follow):
1526 src = state_map[spos]
1527 phi = []
1528 for (dpos, psi) in transition_set:
1529 dst = state_map[dpos]
1530 uiset = set()
1531 for (counter, action) in six.iteritems(psi):
1532 uiset.add(UpdateInstruction(counter_map[counter], self.INCREMENT == action))
1533 phi.append(Transition(dst, uiset))
1534 src._set_transitionSet(phi)
1535
1536 return Automaton(states, counters, self.nullable, containing_state=containing_state)
1537
1538 __counterPositions = None
1540 """Implement definition 13.1 from B{HOV09}.
1541
1542 The return value is the set of all positions leading to
1543 L{NumericalConstraint} nodes for which either the minimum
1544 value is not 1 or the maximum value is not unbounded."""
1545 if self.__counterPositions is None:
1546 cpos = []
1547 self.walkTermTree(lambda _n,_p,_a: \
1548 isinstance(_n, NumericalConstraint) \
1549 and ((1 != _n.min) \
1550 or (_n.max is not None)) \
1551 and _a.append(_p),
1552 None, cpos)
1553 self.__counterPositions = frozenset(cpos)
1554 return self.__counterPositions
1555 counterPositions = property(__get_counterPositions)
1556
1558 """Implement definition 13.2 from B{HOV09}.
1559
1560 This is the subset of L{counterPositions} that occur along the
1561 path to C{pos}."""
1562 rv = set()
1563 for cpos in self.counterPositions:
1564 if cpos == pos[:len(cpos)]:
1565 rv.add(cpos)
1566 return frozenset(rv)
1567
1569 """Obtain a description of the FAC in text format.
1570
1571 This is a diagnostic tool, returning first, last, and follow
1572 maps using positions."""
1573 rv = []
1574 rv.append('r\t= %s' % (str(self),))
1575 states = list(six.iterkeys(self.follow))
1576 rv.append('sym(r)\t= %s' % (' '.join(map(str, map(self.posNodeMap.get, states)))))
1577 rv.append('first(r)\t= %s' % (' '.join(map(str, self.first))))
1578 rv.append('last(r)\t= %s' % (' '.join(map(str, self.last))))
1579 rv.append('C\t= %s' % (' '.join(map(str, self.counterPositions))))
1580 for pos in self.first:
1581 rv.append('qI(%s) -> %s' % (self.posNodeMap[pos].metadata, str(pos)))
1582 for spos in states:
1583 for (dpos, transition_set) in self.follow[spos]:
1584 dst = self.posNodeMap[dpos]
1585 uv = []
1586 for (c, u) in six.iteritems(transition_set):
1587 uv.append('%s %s' % (u == self.INCREMENT and "inc" or "rst", str(c)))
1588 rv.append('%s -%s-> %s ; %s' % (str(spos), dst.metadata, str(dpos), ' ; '.join(uv)))
1589 return '\n'.join(rv)
1590
1592 """Intermediary for nodes that have multiple child nodes."""
1593
1594 __terms = None
1596 """The set of subordinate terms of the current node."""
1597 return self.__terms
1598 terms = property(__get_terms)
1599
1601 """Term that collects an ordered sequence of terms.
1602
1603 The terms are provided as arguments. All must be instances of
1604 a subclass of L{Node}."""
1605 super(MultiTermNode, self).__init__(**kw)
1606 self.__terms = terms
1607
1611
1613 if pre is not None:
1614 pre(self, position, arg)
1615 for c in xrange(len(self.__terms)):
1616 self.__terms[c]._walkTermTree(position + (c,), pre, post, arg)
1617 if post is not None:
1618 post(self, position, arg)
1619
1621 """Intermediary for nodes that have no child nodes."""
1629 return { (): frozenset() }
1630
1632 if pre is not None:
1633 pre(self, position, arg)
1634 if post is not None:
1635 post(self, position, arg)
1636
1638 """A term with a numeric range constraint.
1639
1640 This corresponds to a "particle" in the XML Schema content model."""
1641
1642 _Precedence = -1
1643
1644 __min = None
1647 min = property(__get_min)
1648
1649 __max = None
1652 max = property(__get_max)
1653
1654 __term = None
1657 term = property(__get_term)
1658
1659 - def __init__ (self, term, min=0, max=1, **kw):
1660 """Term with a numerical constraint.
1661
1662 @param term: A term, the number of appearances of which is
1663 constrained in this term.
1664 @type term: L{Node}
1665
1666 @keyword min: The minimum number of occurrences of C{term}.
1667 The value must be non-negative.
1668
1669 @keyword max: The maximum number of occurrences of C{term}.
1670 The value must be positive (in which case it must also be no
1671 smaller than C{min}), or C{None} to indicate an unbounded
1672 number of occurrences."""
1673 super(NumericalConstraint, self).__init__(**kw)
1674 self.__term = term
1675 self.__min = min
1676 self.__max = max
1677
1680
1682 return [ (0,) + _fc for _fc in self.__term.first ]
1683
1685 return [ (0,) + _lc for _lc in self.__term.last ]
1686
1689
1708
1710 if pre is not None:
1711 pre(self, position, arg)
1712 self.__term._walkTermTree(position + (0,), pre, post, arg)
1713 if post is not None:
1714 post(self, position, arg)
1715
1724
1725 -class Choice (MultiTermNode):
1726 """A term that may be any one of a set of terms.
1727
1728 This term matches if any one of its contained terms matches."""
1729
1730 _Precedence = -3
1731
1733 """Term that selects one of a set of terms.
1734
1735 The terms are provided as arguments. All must be instances of
1736 a subclass of L{Node}."""
1737 super(Choice, self).__init__(*terms, **kw)
1738
1744
1750
1752 for t in self.terms:
1753 if t.nullable:
1754 return True
1755 return False
1756
1764
1773
1775 """A term that is an ordered sequence of terms."""
1776
1777 _Precedence = -2
1778
1780 """Term that collects an ordered sequence of terms.
1781
1782 The terms are provided as arguments. All must be instances of
1783 a subclass of L{Node}."""
1784 super(Sequence, self).__init__(*terms, **kw)
1785
1787 rv = set()
1788 c = 0
1789 while c < len(self.terms):
1790 t = self.terms[c]
1791 rv.update([ (c,) + _fc for _fc in t.first])
1792 if not t.nullable:
1793 break
1794 c += 1
1795 return rv
1796
1798 rv = set()
1799 c = len(self.terms) - 1
1800 while 0 <= c:
1801 t = self.terms[c]
1802 rv.update([ (c,) + _lc for _lc in t.last])
1803 if not t.nullable:
1804 break
1805 c -= 1
1806 return rv
1807
1809 for t in self.terms:
1810 if not t.nullable:
1811 return False
1812 return True
1813
1815 rv = {}
1816 for c in xrange(len(self.terms)):
1817 pp = (c,)
1818 for (q, transition_set) in six.iteritems(self.terms[c].follow):
1819 rv[pp + q] = self._PosConcatTransitionSet(pp, transition_set)
1820 for c in xrange(len(self.terms)-1):
1821 t = self.terms[c]
1822 pp = (c,)
1823
1824
1825
1826 for q in t.last:
1827 psi = {}
1828 for p1 in t.counterSubPositions(q):
1829 psi[pp + p1] = self.RESET
1830 nc = c
1831 while nc+1 < len(self.terms):
1832 nc += 1
1833 nt = self.terms[nc]
1834 for sq1 in nt.first:
1835 q1 = (nc,) + sq1
1836 rv[pp+q].append((q1, psi))
1837 if not nt.nullable:
1838 break
1839 return rv
1840
1849
1850 -class All (MultiTermNode, LeafNode):
1851 """A term that is an unordered sequence of terms.
1852
1853 Note that the inheritance structure for this node is unusual. It
1854 has multiple children when it is treated as a term tree, but is
1855 considered a leaf node when constructing an automaton.
1856 """
1857
1858 _Precedence = 0
1859
1861 """Term that collects an unordered sequence of terms.
1862
1863 The terms are provided as arguments. All must be instances of
1864 a subclass of L{Node}."""
1865 super(All, self).__init__(*terms, **kw)
1866
1868 for t in self.terms:
1869 if not t.nullable:
1870 return False
1871 return True
1872
1873 @classmethod
1875 """Create a term tree that implements unordered catenation of
1876 the terms.
1877
1878 This expansion results in a standard choice/sequence term
1879 tree, at the cost of quadratic state expansion because terms
1880 are L{cloned<Node.clone>} as required to satisfy the tree
1881 requirements of the term tree.
1882
1883 @param terms: The tuple of terms that are elements of an
1884 accepted sequence.
1885
1886 @return: A term tree comprising a choice between sequences
1887 that connect each term to the unordered catenation of the
1888 remaining terms."""
1889 if 1 == len(terms):
1890 return terms[0]
1891 disjuncts = []
1892 for i in xrange(len(terms)):
1893 n = terms[i]
1894 rem = map(lambda _s: _s.clone(), terms[:i] + terms[i+1:])
1895 disjuncts.append(Sequence(n, cls.CreateTermTree(*rem)))
1896 return Choice(*disjuncts)
1897
1899 return six.u('&(') + six.u(',').join([str(_t) for _t in self.terms]) + ')'
1900
1902 """A leaf term that is a symbol.
1903
1904 The symbol is represented by the L{metadata} field."""
1905
1906 _Precedence = 0
1907
1911
1914
1917