1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 """This module provides Finite Automata with Counters.
17
18 FACs are type of state machine where a transition may include a
19 constraint and a modification to a set of counters. They are used to
20 implement regular expressions with numerical constraints, as are found
21 in POSIX regexp, Perl, and XML schema.
22
23 The implementation here derives from U{Regular Expressions with
24 Numerical Constraints and Automata with Counters
25 <https://bora.uib.no/bitstream/1956/3628/3/Hovland_LNCS%205684.pdf>},
26 Dag Hovland, Lecture Notes in Computer Science, 2009, Volume 5684,
27 Theoretical Aspects of Computing - ICTAC 2009, Pages 231-245. In what
28 follows, this reference will be denoted B{HOV09}.
29
30 A regular expression is directly translated into a term tree, where
31 nodes are operators such as sequence, choice, and counter
32 restrictions, and the leaf nodes denote symbols in the language of the
33 regular expression.
34
35 In the case of XML content models, the symbols include L{element
36 declarations <pyxb.xmlschema.structures.ElementDeclaration>} and
37 L{wildcard elements <pyxb.xmlschema.structures.Wildcard>}. A
38 numerical constraint node corresponds to an L{XML particle
39 <pyxb.xmlschema.structures.Particle>}, and choice and sequence nodes
40 derive from L{model groups <pyxb.xmlschema.structures.ModelGroup>} of
41 types B{choice} and B{sequence}. As suggested in U{The Membership
42 Problem for Regular Expressions with Unordered Concatenation and
43 Numerical Constraints <http://www.ii.uib.no/~dagh/presLATA2012.pdf>}
44 the B{all} content model can be translated into state machine using
45 choice and sequence at the cost of a quadratic size explosion. Since
46 some XML content models might have a hundred terms in an unordered
47 catenation, this is not acceptable, and the implementation here
48 optimizes this construct by creating a leaf node in the automaton
49 which in turn contains sub-automata for each term, and permits an exit
50 transition only when all the terms that are required have been
51 completed.
52
53 @note: In XSD 1.1 the restriction that terms in an B{all} model group
54 occur at most once has been removed. Since the current implementation
55 removes a completed term from the set of available terms, this will
56 not work: instead the subconfiguration with its counter values must be
57 retained between matches.
58 """
59
60 import operator
61 import functools
62 import logging
63
64 log_ = logging.getLogger(__name__)
68
70 """Exception raised when a FAC term tree is not a tree.
71
72 For example, a L{Symbol} node appears multiple times, or a cycle is detected."""
73
74 parent = None
75 """The L{MultiTermNode} containing the term that proves invalidity"""
76
77 term = None
78 """The L{Node} that proves invalidity"""
79
83
85 """Exception raised when an unsatisfied update instruction is executed.
86
87 This indicates an internal error in the implementation."""
88
89 update_instruction = None
90 """The L{UpdateInstruction} with an unsatisfied L{CounterCondition}"""
91
92 values = None
93 """The unsatisfying value map from L{CounterCondition} instances to integers"""
94
98
100 """Symbol rejected by L{Configuration_ABC.step}.
101
102 The exception indicates that the proposed symbol either failed to
103 produce a transition (L{UnrecognizedSymbolError}) or produced
104 multiple equally valid transitions
105 (L{NondeterministicSymbolError})."""
106
107 configuration = None
108 """The instance of L{Configuration_ABC} that raised the exception.
109 From L{Configuration_ABC.acceptableSymbols} you can determine what
110 alternatives might be present."""
111
112 symbol = None
113 """The symbol that was not accepted."""
114
118 acceptable = property(__get_acceptable)
119
123
125 """L{Configuration.step} failed to find a valid transition."""
126 pass
127
129 """L{Configuration.step} found multiple transitions."""
130 pass
131
133 """Mix-in used by symbols to provide a custom match implementation.
134
135 If a L{State.symbol} value is an instance of this mix-in, then it
136 will be used to validate a candidate symbol for a match."""
137
138 - def match (self, symbol):
139 raise NotImplementedError('%s.match' % (type(self).__name__,))
140
142 """A thin wrapper around an object reference.
143
144 The state of the automaton corresponds to a position, or marked
145 symbol, in the term tree. Because the same symbol may appear at
146 multiple locations in the tree, and the distinction between these
147 positions is critical, a L{State} wrapper is provided to maintain
148 distinct values."""
149
150 - def __init__ (self, symbol, is_initial, final_update=None, is_unordered_catenation=False):
151 """Create a FAC state.
152
153 @param symbol: The symbol associated with the state.
154 Normally initialized from the L{Symbol.metadata} value. The
155 state may be entered if, among other conditions, the L{match}
156 routine accepts the proposed input as being consistent with
157 this value.
158
159 @param is_initial: C{True} iff this state may serve as the
160 first state of the automaton.
161
162 @param final_update: C{None} if this state is not an
163 accepting state of the automaton; otherwise a set of
164 L{UpdateInstruction} values that must be satisfied by the
165 counter values in a configuration as a further restriction of
166 acceptance.
167
168 @param is_unordered_catenation: C{True} if this state has
169 subautomata that must be matched to execute the unordered
170 catenation of an L{All} node; C{False} if this is a regular
171 symbol."""
172 self.__symbol = symbol
173 self.__isInitial = not not is_initial
174 self.__finalUpdate = final_update
175 self.__isUnorderedCatenation = is_unordered_catenation
176
177 __automaton = None
179 """Link to the L{Automaton} to which the state belongs."""
180 return self.__automaton
182 """Method invoked during automaton construction to set state owner."""
183 assert self.__automaton is None
184 self.__automaton = automaton
185 return self
186 automaton = property(__get_automaton)
187
188 __symbol = None
190 """Application-specific metadata identifying the symbol.
191
192 See also L{match}."""
193 return self.__symbol
194 symbol = property(__get_symbol)
195
196 __isUnorderedCatenation = None
198 """Indicate whether the state has subautomata for unordered
199 catenation.
200
201 To reduce state explosion due to non-determinism, such a state
202 executes internal transitions in subautomata until all terms
203 have matched or a failure is discovered."""
204 return self.__isUnorderedCatenation
205 isUnorderedCatenation = property(__get_isUnorderedCatenation)
206
207 __subAutomata = None
209 """A sequence of sub-automata supporting internal state transitions.
210
211 This will return C{None} unless L{isUnorderedCatenation} is C{True}."""
212 return self.__subAutomata
217 subAutomata = property(__get_subAutomata)
218
219 __isInitial = None
221 """C{True} iff this state may be the first state the automaton enters."""
222 return self.__isInitial
223 isInitial = property(__get_isInitial)
224
225 __automatonEntryTransitions = None
227 """Return the set of initial transitions allowing entry to the automata through this state.
228
229 These are structurally-permitted transitions only, and must be
230 filtered based on the symbol that might trigger the
231 transition. The results are not filtered based on counter
232 value, since this value is used to determine how the
233 containing automaton might be entered. Consequently the
234 return value is the empty set unless this is an initial state.
235
236 The returned set is closed under entry to sub-automata,
237 i.e. it is guaranteed that each transition includes a
238 consuming state even if it requires a multi-element chain of
239 transitions into subautomata to reach one."""
240 if self.__automatonEntryTransitions is None:
241 transitions = []
242 if self.__isInitial:
243 xit = Transition(self, set())
244 if self.__subAutomata is None:
245 transitions.append(xit)
246 else:
247 for sa in self.__subAutomata:
248 for saxit in sa.initialTransitions:
249 transitions.append(xit.chainTo(saxit.makeEnterAutomatonTransition()))
250 self.__automatonEntryTransitions = transitions
251 return self.__automatonEntryTransitions
252 automatonEntryTransitions = property(__get_automatonEntryTransitions)
253
254 __finalUpdate = None
256 """Return the update instructions that must be satisfied for this to be a final state."""
257 return self.__finalUpdate
258 finalUpdate = property(__get_finalUpdate)
259
261 """Return the set of candidate transitions to enter a sub-automaton of this state.
262
263 @param sub_automata: A subset of the sub-automata of this
264 state which should contribute to the result. If C{None}, all
265 sub-automata are used.
266
267 @return: A pair C{(nullable, transitions)} where C{nullable}
268 is C{True} iff there is at least one sub-automaton that is in
269 an accepting state on entry, and C{transitions} is a list of
270 L{Transition} instances describing how to reach some state in
271 a sub-automaton via a consumed symbol.
272 """
273 assert self.__subAutomata is not None
274 is_nullable = True
275 transitions = []
276 if sub_automata is None:
277 sub_automata = self.__subAutomata
278 for sa in sub_automata:
279 if not sa.nullable:
280 is_nullable = False
281 transitions.extend(sa.initialTransitions)
282 return (is_nullable, transitions)
283
285 """C{True} iff this state is an accepting state for the automaton.
286
287 @param counter_values: Counter values that further validate
288 whether the requirements of the automaton have been met.
289
290 @return: C{True} if this is an accepting state and the
291 counter values relevant at it are satisfied."""
292 if self.__finalUpdate is None:
293 return False
294 return UpdateInstruction.Satisfies(counter_values, self.__finalUpdate)
295
296 __transitionSet = None
298 """Definitions of viable transitions from this state.
299
300 The transition set of a state is a set of L{Transition} nodes
301 identifying a state reachable in a single step from this
302 state, and a set of counter updates that must apply if the
303 transition is taken.
304
305 These transitions may not in themselves consume a symbol. For
306 example, if the destination state represents a match of an
307 L{unordered catenation of terms<All>}, then secondary
308 processing must be done to traverse into the automata for
309 those terms and identify transitions that include a symbol
310 consumption.
311
312 @note: Although conceptually the viable transitions are a set,
313 this implementation maintains them in a list so that order is
314 preserved when automata processing becomes non-deterministic.
315 PyXB is careful to build the transition list so that the
316 states are attempted in the order in which they appear in the
317 schema that define the automata.
318 """
319 return self.__transitionSet
320 transitionSet = property(__get_transitionSet)
321
323 """Method invoked during automaton construction to set the
324 legal transitions from the state.
325
326 The set of transitions cannot be defined until all states that
327 appear in it are available, so the creation of the automaton
328 requires that the association of the transition set be
329 delayed. (Though described as a set, the transitions are a
330 list where order reflects priority.)
331
332 @param transition_set: a list of pairs where the first
333 member is the destination L{State} and the second member is the
334 set of L{UpdateInstruction}s that apply when the automaton
335 transitions to the destination state."""
336
337 self.__transitionSet = []
338 seen = set()
339 for xit in transition_set:
340 if not (xit in seen):
341 seen.add(xit)
342 self.__transitionSet.append(xit)
343
344 - def match (self, symbol):
345 """Return C{True} iff the symbol matches for this state.
346
347 This may be overridden by subclasses when matching by
348 equivalence does not work. Alternatively, if the symbol
349 stored in this node is a subclass of L{SymbolMatch_mixin}, then
350 its match method will be used. Otherwise C{symbol} matches
351 only if it is equal to the L{symbol} of this state.
352
353 @param symbol: A candidate symbol corresponding to the
354 expression symbol for this state.
355
356 @return: C{True} iff C{symbol} is a match for this state.
357 """
358 if isinstance(self.__symbol, SymbolMatch_mixin):
359 return self.__symbol.match(symbol)
360 return self.__symbol == symbol
361
363 return 'S.%x' % (id(self),)
364
365 - def _facText (self):
366 rv = []
367 rv.extend(map(str, self.__transitionSet))
368 if self.__finalUpdate is not None:
369 if 0 == len(self.__finalUpdate):
370 rv.append('Final (no conditions)')
371 else:
372 rv.append('Final if %s' % (' '.join(map(lambda _ui: str(_ui.counterCondition), self.__finalUpdate))))
373 return '\n'.join(rv)
374
376 """A counter condition is a range limit on valid counter values.
377
378 Instances of this class serve as keys for the counters that
379 represent the configuration of a FAC. The instance also maintains
380 a pointer to application-specific L{metadata}."""
381
382 __min = None
384 """The minimum legal value for the counter.
385
386 This is a non-negative integer."""
387 return self.__min
388 min = property(__get_min)
389
390 __max = None
392 """The maximum legal value for the counter.
393
394 This is a positive integer, or C{None} to indicate that the
395 counter is unbounded."""
396 return self.__max
397 max = property(__get_max)
398
399 __metadata = None
403 metadata = property(__get_metadata)
404
405 - def __init__ (self, min, max, metadata=None):
406 """Create a counter condition.
407
408 @param min: The value for L{min}
409 @param max: The value for L{max}
410 @param metadata: The value for L{metadata}
411 """
412 self.__min = min
413 self.__max = max
414 self.__metadata = metadata
415
418
424
426 return not self.__eq__(other)
427
429 return 'C.%x{%s,%s}' % (id(self), self.min, self.max is not None and self.max or '')
430
432 """An update instruction pairs a counter with a mutation of that
433 counter.
434
435 The instruction is executed during a transition from one state to
436 another, and causes the corresponding counter to be incremented or
437 reset. The instruction may only be applied if doing so does not
438 violate the conditions of the counter it affects."""
439
440 __counterCondition = None
442 """A reference to the L{CounterCondition} identifying the
443 counter to be updated.
444
445 The counter condition instance is used as a key to the
446 dictionary maintaining current counter values."""
447 return self.__counterCondition
448 counterCondition = property(__get_counterCondition)
449
450 __doIncrement = None
452 """C{True} if the counter is to be incremented; C{False} if it is to be reset."""
453 return self.__doIncrement
454 doIncrement = property(__get_doIncrement)
455
456
457 __min = None
458 __max = None
459
460 - def __init__ (self, counter_condition, do_increment):
461 """Create an update instruction.
462
463 @param counter_condition: A L{CounterCondition} identifying a
464 minimum and maximum value for a counter, and serving as a map
465 key for the value of the corresponding counter.
466
467 @param do_increment: C{True} if the update is to increment
468 the value of the counter; C{False} if the update is to reset
469 the counter.
470 """
471 self.__counterCondition = counter_condition
472 self.__doIncrement = not not do_increment
473 self.__min = counter_condition.min
474 self.__max = counter_condition.max
475
477 """Implement a component of definition 5 from B{HOV09}.
478
479 The update instruction is satisfied by the counter values if
480 its action may be legitimately applied to the value of its
481 associated counter.
482
483 @param counter_values: A map from L{CounterCondition}s to
484 non-negative integers
485
486 @return: C{True} or C{False}
487 """
488 value = counter_values[self.__counterCondition]
489 if self.__doIncrement \
490 and (self.__max is not None) \
491 and (value >= self.__max):
492 return False
493 if (not self.__doIncrement) \
494 and (value < self.__min):
495 return False
496 return True
497
498 @classmethod
499 - def Satisfies (cls, counter_values, update_instructions):
500 """Return C{True} iff the counter values satisfy the update
501 instructions.
502
503 @param counter_values: A map from L{CounterCondition} to
504 integer counter values
505
506 @param update_instructions: A set of L{UpdateInstruction}
507 instances
508
509 @return: C{True} iff all instructions are satisfied by the
510 values and limits."""
511 for psi in update_instructions:
512 if not psi.satisfiedBy(counter_values):
513 return False
514 return True
515
516 - def apply (self, counter_values):
529
530 @classmethod
531 - def Apply (cls, update_instructions, counter_values):
532 """Apply the update instructions to the counter values.
533
534 @param update_instructions: A set of L{UpdateInstruction}
535 instances.
536
537 @param counter_values: A map from L{CounterCondition}
538 instances to non-negative integers. This map is updated
539 in-place by applying each instruction in
540 C{update_instructions}."""
541 for psi in update_instructions:
542 psi.apply(counter_values)
543
546
551
553 return not self.__eq__(other)
554
557
559 """Representation of a FAC state transition."""
560
561 __destination = None
563 """The transition destination state."""
564 return self.__destination
565 destination = property(__get_destination)
566
567 __updateInstructions = None
569 """The set of counter updates that are applied when the transition is taken."""
570 return self.__updateInstructions
571 updateInstructions = property(__get_updateInstructions)
572
573 __nextTransition = None
575 """The next transition to apply in this chain.
576
577 C{None} if this is the last transition in the chain."""
578 return self.__nextTransition
579 nextTransition = property(__get_nextTransition)
580
581 __layerLink = None
583 """A directive relating to changing automaton layer on transition.
584
585 C{None} indicates this transition is from one state to another
586 within a single automaton.
587
588 An instance of L{Configuration} is a transition on completion
589 of a subautomaton back to the configuration in the parent
590 automaton. The L{destination} is the state in the parent automaton.
591
592 An instance of L{Automaton} requires creation of a
593 sub-configuration and initial entry into the automaton. The
594 L{destination} is the state in the sub-automaton.
595 """
596 return self.__layerLink
597 layerLink = property(__get_layerLink)
598
599 - def __init__ (self, destination, update_instructions, layer_link=None):
600 """Create a transition to a state.
601
602 @param destination: the state into which the transition is
603 made
604
605 @param update_instructions: A iterable of L{UpdateInstruction}s
606 denoting the changes that must be made to counters as a
607 consequence of taking the transition.
608
609 @keyword layer_link: The value for L{layerLink}."""
610 self.__destination = destination
611 if not isinstance(update_instructions, list):
612 update_instructions = list(update_instructions)
613 self.__updateInstructions = update_instructions
614 self.__layerLink = layer_link
615
627
629 """Return the L{symbol<State.symbol>} of the L{consumingState}."""
630 return self.consumingState().symbol
631
662
663 - def apply (self, configuration, clone_map=None):
664 """Apply the transitition to a configuration.
665
666 This updates the configuration counter values based on the
667 update instructions, and sets the new configuration state.
668
669 @note: If the transition involves leaving a sub-automaton or
670 creating a new sub-automaton, the returned configuration
671 structure will be different from the one passed in. You
672 should invoke this as::
673
674 cfg = transition.apply(cfg)
675
676 @param configuration: A L{Configuration} of an executing automaton
677
678 @param clone_map: A map from L{Configuration} to
679 L{Configuration} reflecting the replacements made when the
680 configuration for which the transition was calculated was
681 subsequently cloned into the C{configuration} passed into this
682 method. This is only necessary when the transition includes
683 layer transitions.
684
685 @return: The resulting configuration
686 """
687 layer_link = self.__layerLink
688 if isinstance(layer_link, Configuration):
689 if clone_map is not None:
690 layer_link = clone_map[layer_link]
691 configuration = layer_link.leaveAutomaton(configuration)
692 elif isinstance(layer_link, Automaton):
693 configuration = configuration.enterAutomaton(layer_link)
694 UpdateInstruction.Apply(self.updateInstructions, configuration._get_counterValues())
695 configuration._set_state(self.destination, layer_link is None)
696 if self.__nextTransition is None:
697 return configuration
698 return self.__nextTransition.apply(configuration, clone_map)
699
700 - def chainTo (self, next_transition):
701 """Duplicate the state and chain the duplicate to a successor
702 transition.
703
704 This returns a new transition which applies the operation for
705 this transition, then proceeds to apply the next transition in
706 the chain.
707
708 @note: The node that is invoking this must not have successor
709 transitions.
710
711 @param next_transition: A L{Transition} node describing a
712 subsequent transition.
713
714 @return: a clone of this node, augmented with a link to
715 C{next_transition}."""
716 assert not self.__nextTransition
717 head = type(self)(self.__destination, self.__updateInstructions, layer_link=self.__layerLink)
718 head.__nextTransition = next_transition
719 return head
720
731
737
744
746 return not self.__eq__(other)
747
762
764 """Base class for something that represents an L{Automaton} in
765 execution.
766
767 For deterministic automata, this is generally a L{Configuration}
768 which records the current automaton state along with its counter
769 values.
770
771 For non-deterministic automata, this is a L{MultiConfiguration}
772 which records a set of L{Configuration}s."""
773
775 """Return the acceptable L{Symbol}s given the current
776 configuration.
777
778 This method extracts the symbol from all candidate transitions
779 that are permitted based on the current counter values.
780 Because transitions are presented in a preferred order, the
781 symbols are as well."""
782 raise NotImplementedError('%s.acceptableSymbols' % (type(self).__name__,))
783
784 - def step (self, symbol):
785 """Execute an automaton transition using the given symbol.
786
787 @param symbol: A symbol from the alphabet of the automaton's
788 language. This is a Python value that should be accepted by
789 the L{SymbolMatch_mixin.match} method of a L{State.symbol}.
790 It is not a L{Symbol} instance.
791
792 @return: The new configuration resulting from the step.
793
794 @raises AutomatonStepError: L{UnrecognizedSymbolError}
795 when no transition compatible with C{symbol} is available, and
796 L{NondeterministicSymbolError} if C{symbol} admits multiple
797 transitions and the subclass does not support
798 non-deterministic steps (see L{MultiConfiguration}).
799
800 @warning: If the step entered or left a sub-automaton the
801 return value will not be the configuration that was used to
802 execute the step. The proper pattern for using this method
803 is::
804
805 cfg = cfg.step(sym)
806
807 """
808 raise NotImplementedError('%s.step' % (type(self).__name__,))
809
811 """The state of an L{Automaton} in execution.
812
813 This combines a state node of the automaton with a set of counter
814 values."""
815
816 __state = None
818 """The state of the configuration.
819
820 This is C{None} to indicate an initial state, or one of the underlying automaton's states."""
821 return self.__state
848 state = property(__get_state)
849
850 __counterValues = None
851 """The values of the counters.
852
853 This is a map from the CounterCondition instances of the
854 underlying automaton to integer values."""
857
858 __automaton = None
861 automaton = property(__get_automaton)
862
863 __subConfiguration = None
865 """Reference to configuration being executed in a sub-automaton.
866
867 C{None} if no sub-automaton is active, else a reference to a
868 configuration that is being executed in a sub-automaton.
869
870 Sub-configurations are used to match sub-terms in an
871 L{unordered catenation<All>} term. A configuration may have
872 at most one sub-configuration at a time, and the configuration
873 will be removed and possibly replaced when the term being
874 processed completes."""
875 return self.__subConfiguration
876 subConfiguration = property(__get_subConfiguration)
877
878 __superConfiguration = None
880 """Reference to the configuration for which this is a
881 sub-configuration.
882
883 C{None} if no super-automaton is active, else a reference to a
884 configuration that is being executed in a super-automaton.
885
886 The super-configuration relation persists for the lifetime of
887 the configuration."""
888 return self.__superConfiguration
889 superConfiguration = property(__get_superConfiguration)
890
891 __subAutomata = None
893 """A set of automata that must be satisfied before the current state can complete.
894
895 This is used in unordered catenation. Each sub-automaton
896 represents a term in the catenation. When the configuration
897 enters a state with sub-automata, a set containing references
898 to those automata is assigned to this attribute.
899 Subsequently, until all automata in the state are satisfied,
900 transitions can only occur within an active sub-automaton, out
901 of the active sub-automaton if it is in an accepting state,
902 and into a new sub-automaton if no sub-automaton is active.
903 """
904 return self.__subAutomata
907 subAutomata = property(__get_subAutomata)
908
910 """Create a transition back to the containing configuration.
911
912 This is done when a configuration is in an accepting state and
913 there are candidate transitions to other states that must be
914 considered. The transition does not consume a symbol."""
915 assert self.__superConfiguration is not None
916 return Transition(self.__superConfiguration.__state, set(), layer_link=self.__superConfiguration)
917
919 """Execute steps to leave a sub-automaton.
920
921 @param sub_configuration: The configuration associated with
922 the automata that has completed.
923
924 @return: C{self}"""
925 assert sub_configuration.__superConfiguration == self
926 self.__subConfiguration = None
927 return self
928
946
949
956
958 """Return list of viable transitions on C{symbol}
959
960 The transitions that are structurally permitted from this
961 state, in order, filtering out those transitions where the
962 update instruction is not satisfied by the configuration
963 counter values and optionally those for which the symbol does
964 not match.
965
966 @param symbol: A symbol through which a transition from this
967 state is intended. A value of C{None} indicates that the set
968 of transitions should ignore the symbol; candidates are still
969 filtered based on the counter state of the configuration.
970
971 @return: A list of L{Transition} instances permitted from the
972 current configuration. If C{symbol} is not C{None},
973 transitions that would not accept the symbol are excluded.
974 Any transition that would require an unsatisfied counter
975 update is also excluded. Non-deterministic automata may
976 result in a lits with multiple members. """
977
978 fac = self.__automaton
979 transitions = []
980 if symbol is None:
981 match_filter = lambda _xit: True
982 else:
983 match_filter = lambda _xit: _xit.consumingState().match(symbol)
984 update_filter = lambda _xit: _xit.satisfiedBy(self)
985
986 if self.__state is None:
987
988 transitions.extend(fac.initialTransitions)
989 elif (self.__subConfiguration is not None) and not self.__subConfiguration.isAccepting():
990
991
992 pass
993 else:
994
995
996 include_local = True
997 if self.__subAutomata:
998
999
1000
1001 (include_local, sub_initial) = self.__state.subAutomataInitialTransitions(self.__subAutomata)
1002 transitions.extend(map(lambda _xit: _xit.makeEnterAutomatonTransition(), sub_initial))
1003 if include_local:
1004
1005 for xit in filter(update_filter, self.__state.transitionSet):
1006 if xit.consumingState() is not None:
1007 transitions.append(xit)
1008 else:
1009
1010
1011
1012
1013 (_, sub_initial) = xit.destination.subAutomataInitialTransitions()
1014 transitions.extend(map(lambda _xit: xit.chainTo(_xit.makeEnterAutomatonTransition()), sub_initial))
1015 if (self.__superConfiguration is not None) and self.isAccepting():
1016
1017 lxit = self.makeLeaveAutomatonTransition()
1018 supxit = self.__superConfiguration.candidateTransitions(symbol)
1019 transitions.extend(map(lambda _sx: lxit.chainTo(_sx), supxit))
1020 assert len(frozenset(transitions)) == len(transitions)
1021 return filter(update_filter, filter(match_filter, transitions))
1022
1025
1026 - def step (self, symbol):
1033
1035 """Return C{True} iff no transitions have ever been made."""
1036 return self.__state is None
1037
1052
1053 - def __init__ (self, automaton, super_configuration=None):
1057
1058 - def clone (self, clone_map=None):
1059 """Clone a configuration and its descendents.
1060
1061 This is used for parallel execution where a configuration has
1062 multiple candidate transitions and must follow all of them.
1063 It clones the entire chain of configurations through
1064 multiple layers.
1065
1066 @param clone_map: Optional map into which the translation from
1067 the original configuration object to the corresponding cloned
1068 configuration object can be reconstructed, e.g. when applying
1069 a transition that includes automata exits referencing
1070 superconfigurations from the original configuration.
1071 """
1072 if clone_map is None:
1073 clone_map = {}
1074 root = self
1075 while root.__superConfiguration is not None:
1076 root = root.__superConfiguration
1077 root = root._clone(clone_map, None)
1078 return clone_map.get(self)
1079
1080 - def _clone (self, clone_map, super_configuration):
1092
1095
1097 """Support parallel execution of state machine.
1098
1099 This holds a set of configurations, and executes each transition
1100 on each one. Configurations which fail to accept a step are
1101 silently dropped; only if this results in no remaining
1102 configurations will L{UnrecognizedSymbolError} be raised. If a
1103 step admits multiple valid transitions, a configuration is added
1104 for each one.
1105
1106 See L{pyxb.binding.content.AutomatonConfiguration} for an
1107 alternative solution which holds actions associated with the
1108 transition until the non-determinism is resolved."""
1109
1110 __configurations = None
1111
1114
1120
1121 - def step (self, symbol):
1122 next_configs = []
1123 for cfg in self.__configurations:
1124 transitions = cfg.candidateTransitions(symbol)
1125 if 0 == len(transitions):
1126 pass
1127 elif 1 == len(transitions):
1128 next_configs.append(transitions[0].apply(cfg))
1129 else:
1130 for transition in transitions:
1131 clone_map = {}
1132 ccfg = cfg.clone(clone_map)
1133 next_configs.append(transition.apply(ccfg, clone_map))
1134 if 0 == len(next_configs):
1135 raise UnrecognizedSymbolError(self, symbol)
1136 assert len(frozenset(next_configs)) == len(next_configs)
1137 self.__configurations = next_configs
1138 return self
1139
1141 """Return the set of configurations that are in an accepting state.
1142
1143 Note that some of the configurations may be within a
1144 sub-automaton; their presence in the return value is because
1145 the root configuration is also accepting."""
1146 accepting = []
1147 for cfg in self.__configurations:
1148 rcfg = cfg
1149
1150
1151 while rcfg.superConfiguration is not None:
1152 rcfg = rcfg.superConfiguration
1153 if rcfg.isAccepting():
1154 accepting.append(cfg)
1155 return accepting
1156
1158 """Representation of a Finite Automaton with Counters.
1159
1160 This has all the standard FAC elements, plus links to other
1161 states/automata as required to support the nested automata
1162 construct used for matching unordered catenation terms."""
1163 __states = None
1165 """The set of L{State}s in the automaton.
1166
1167 These correspond essentially to marked symbols in the original
1168 regular expression, or L{element
1169 declarations<pyxb.xmlschema.structures.ElementDeclaration>} in
1170 an XML schema.
1171
1172 @note: These are conceptually a set and are stored that way.
1173 When an L{Automaton} is constructed the incoming states should
1174 be passed as a list so the calculated initial transitions are
1175 executed in a deterministic order."""
1176 return self.__states
1177 states = property(__get_states)
1178
1179 __counterConditions = None
1181 """The set of L{CounterCondition}s in the automaton.
1182
1183 These are marked positions in the regular expression, or
1184 L{particles<pyxb.xmlschema.structures.Particle>} in an XML
1185 schema, paired with their occurrence constraints."""
1186 return self.__counterConditions
1187 counterConditions = property(__get_counterConditions)
1188
1189 __nullable = None
1191 """C{True} iff the automaton accepts the empty string."""
1192 return self.__nullable
1193 nullable = property(__get_nullable)
1194
1195 __initialTransitions = None
1197 """The set of transitions that may be made to enter the automaton.
1198
1199 These are full transitions, including chains into subautomata
1200 if an initial state represents a node with sub-automata.
1201
1202 @note: As with L{State.transitionSet}, the set is represented
1203 as a list to preserve priority when resolving
1204 non-deterministic matches."""
1205 return self.__initialTransitions
1206 initialTransitions = property(__get_initialTransitions)
1207
1208 __containingState = None
1210 """The L{State} instance for which this is a sub-automaton.
1211
1212 C{None} if this is not a sub-automaton."""
1213 return self.__containingState
1214 containingState = property(__get_containingState)
1215
1216 __finalStates = None
1218 """The set of L{State} members which can terminate a match."""
1219 return self.__finalStates
1220 finalStates = property(__get_finalStates)
1221
1222 - def __init__ (self, states, counter_conditions, nullable, containing_state=None):
1240
1242 """Return a new L{Configuration} instance for this automaton."""
1243 return Configuration(self)
1244
1260
1261 -class Node (object):
1262 """Abstract class for any node in the term tree.
1263
1264 In its original form a B{position} (C{pos}) is a tuple of
1265 non-negative integers comprising a path from a node in the term
1266 tree. It identifies a node in the tree. After the FAC has been
1267 constructed, only positions that are leaf nodes in the term tree
1268 remain, and the corresponding symbol value (Python instance) is
1269 used as the position.
1270
1271 An B{update instruction} (C{psi}) is a map from positions to
1272 either L{Node.RESET} or L{Node.INCREMENT}. It identifies actions
1273 to be taken on the counter states corresponding to the positions
1274 in its domain.
1275
1276 A B{transition} is a pair containing a position and an update
1277 instruction. It identifies a potential next node in the state and
1278 the updates that are to be performed if the transition is taken.
1279
1280 A B{follow value} is a map from a position to a set of transitions
1281 that may originate from the pos. This set is represented as a
1282 Python list since update instructions are dicts and cannot be
1283 hashed.
1284 """
1285
1286 _Precedence = None
1287 """An integral value used for parenthesizing expressions.
1288
1289 A subterm that has a precedence less than that of its containing
1290 term must be enclosed in parentheses when forming a text
1291 expression representing the containing term."""
1292
1293 RESET = False
1294 """An arbitrary value representing reset of a counter."""
1295
1296 INCREMENT = True
1297 """An arbitrary value representing increment of a counter."""
1298
1300 """Create a FAC term-tree node.
1301
1302 @keyword metadata: Any application-specific metadata retained in
1303 the term tree for transfer to the resulting automaton."""
1304 self.__metadata = kw.get('metadata')
1305
1306 - def clone (self, *args, **kw):
1307 """Create a deep copy of the node.
1308
1309 All term-tree--related attributes and properties are replaced
1310 with deep clones. Other attributes are preserved.
1311
1312 @param args: A tuple of arguments to be passed to the instance
1313 constructor.
1314
1315 @param kw: A dict of keywords to be passed to the instance
1316 constructor.
1317
1318 @note: Subclasses should pre-extend this method to augment the
1319 C{args} and C{kw} parameters as necessary to match the
1320 expectations of the C{__init__} method of the class being
1321 cloned."""
1322 kw.setdefault('metadata', self.metadata)
1323 return type(self)(*args, **kw)
1324
1325 __metadata = None
1329 metadata = property(__get_metadata)
1330
1331 __first = None
1333 """The I{first} set for the node.
1334
1335 This is the set of positions leading to symbols that can
1336 appear first in a string matched by an execution starting at
1337 the node."""
1338 if self.__first is None:
1339 self.__first = frozenset(self._first())
1340 return self.__first
1341 first = property(__get_first)
1342
1344 """Abstract method that defines L{first} for the subclass.
1345
1346 The return value should be an iterable of tuples of integers
1347 denoting paths from this node through the term tree to a
1348 symbol."""
1349 raise NotImplementedError('%s.first' % (type(self).__name__,))
1350
1351 __last = None
1353 """The I{last} set for the node.
1354
1355 This is the set of positions leading to symbols that can
1356 appear last in a string matched by an execution starting at
1357 the node."""
1358 if self.__last is None:
1359 self.__last = frozenset(self._last())
1360 return self.__last
1361 last = property(__get_last)
1362
1364 """Abstract method that defines L{last} for the subclass.
1365
1366 The return value should be an iterable of tuples of integers
1367 denoting paths from this node through the term tree to a
1368 symbol."""
1369 raise NotImplementedError('%s.last' % (type(self).__name__,))
1370
1371 __nullable = None
1377 nullable = property(__get_nullable)
1378
1380 """Abstract method that defines L{nullable} for the subclass.
1381
1382 The return value should be C{True} or C{False}."""
1383 raise NotImplementedError('%s.nullable' % (type(self).__name__,))
1384
1385 __follow = None
1391 follow = property(__get_follow)
1392
1394 """Abstract method that defines L{follow} for the subclass.
1395
1396 The return value should be a map from tuples of integers (positions)
1397 to a list of transitions, where a transition is a position and
1398 an update instruction."""
1399 raise NotImplementedError('%s.follow' % (type(self).__name__,))
1400
1402 """Reset any term-tree state associated with the node.
1403
1404 Any change to the structure of the term tree in which the node
1405 appears invalidates memoized first/follow sets and related
1406 information. This method clears all that data so it can be
1407 recalculated. It does not clear the L{metadata} link, or any
1408 existing structural data."""
1409 self.__first = None
1410 self.__last = None
1411 self.__nullable = None
1412 self.__follow = None
1413 self.__counterPositions = None
1414
1416 """Utility function for term tree processing.
1417
1418 @param pre: a callable that, unless C{None}, is invoked at
1419 each node C{n} with parameters C{n}, C{pos}, and C{arg}, where
1420 C{pos} is the tuple of integers identifying the path from the
1421 node at on which this method was invoked to the node being
1422 processed. The invocation occurs before processing any
1423 subordinate nodes.
1424
1425 @param post: as with C{pre} but invocation occurs after
1426 processing any subordinate nodes.
1427
1428 @param arg: a value passed to invocations of C{pre} and
1429 C{post}."""
1430 self._walkTermTree((), pre, post, arg)
1431
1433 """Abstract method implementing L{walkTermTree} for the subclass."""
1434 raise NotImplementedError('%s.walkTermTree' % (type(self).__name__,))
1435
1436 __posNodeMap = None
1438 """A map from positions to nodes in the term tree."""
1439 if self.__posNodeMap is None:
1440 pnm = { }
1441 self.walkTermTree(lambda _n,_p,_a: _a.setdefault(_p, _n), None, pnm)
1442 self.__posNodeMap = pnm
1443 return self.__posNodeMap
1444 posNodeMap = property(__get_posNodeMap)
1445
1446 __nodePosMap = None
1455 nodePosMap = property(__get_nodePosMap)
1456
1457 @classmethod
1459 """Implement definition 11.1 in B{HOV09}."""
1460 return frozenset([ pos + _mp for _mp in pos_set ])
1461
1462 @classmethod
1464 """Implement definition 11.2 in B{HOV09}"""
1465 rv = {}
1466 for (q, v) in psi.iteritems():
1467 rv[pos + q] = v
1468 return rv
1469
1470 @classmethod
1472 """Implement definition 11.3 in B{HOV09}"""
1473 ts = []
1474 for (q, psi) in transition_set:
1475 ts.append((pos + q, cls._PosConcatUpdateInstruction(pos, psi) ))
1476 return ts
1477
1483
1485
1486
1487
1488 self.walkTermTree(self.__resetAndValidate, None, set())
1489
1490 counter_map = { }
1491 for pos in self.counterPositions:
1492 nci = self.posNodeMap.get(pos)
1493 assert isinstance(nci, NumericalConstraint)
1494 assert nci not in counter_map
1495 counter_map[pos] = ctr_cond_ctor(nci.min, nci.max, nci.metadata)
1496 counters = counter_map.values()
1497
1498 state_map = { }
1499 for pos in self.follow.iterkeys():
1500 sym = self.posNodeMap.get(pos)
1501 assert isinstance(sym, LeafNode)
1502 assert sym not in state_map
1503
1504
1505
1506 is_initial = pos in self.first
1507
1508
1509
1510 final_update = None
1511 if (() == pos and sym.nullable) or (pos in self.last):
1512
1513
1514
1515 final_update = set()
1516 for nci in map(counter_map.get, self.counterSubPositions(pos)):
1517 final_update.add(UpdateInstruction(nci, False))
1518 state_map[pos] = state_ctor(sym.metadata, is_initial=is_initial, final_update=final_update, is_unordered_catenation=isinstance(sym, All))
1519 if isinstance(sym, All):
1520 state_map[pos]._set_subAutomata(*map(lambda _s: _s.buildAutomaton(state_ctor, ctr_cond_ctor, containing_state=state_map[pos]), sym.terms))
1521 states = state_map.values()
1522
1523 for (spos, transition_set) in self.follow.iteritems():
1524 src = state_map[spos]
1525 phi = []
1526 for (dpos, psi) in transition_set:
1527 dst = state_map[dpos]
1528 uiset = set()
1529 for (counter, action) in psi.iteritems():
1530 uiset.add(UpdateInstruction(counter_map[counter], self.INCREMENT == action))
1531 phi.append(Transition(dst, uiset))
1532 src._set_transitionSet(phi)
1533
1534 return Automaton(states, counters, self.nullable, containing_state=containing_state)
1535
1536 __counterPositions = None
1538 """Implement definition 13.1 from B{HOV09}.
1539
1540 The return value is the set of all positions leading to
1541 L{NumericalConstraint} nodes for which either the minimum
1542 value is not 1 or the maximum value is not unbounded."""
1543 if self.__counterPositions is None:
1544 cpos = []
1545 self.walkTermTree(lambda _n,_p,_a: \
1546 isinstance(_n, NumericalConstraint) \
1547 and ((1 != _n.min) \
1548 or (_n.max is not None)) \
1549 and _a.append(_p),
1550 None, cpos)
1551 self.__counterPositions = frozenset(cpos)
1552 return self.__counterPositions
1553 counterPositions = property(__get_counterPositions)
1554
1556 """Implement definition 13.2 from B{HOV09}.
1557
1558 This is the subset of L{counterPositions} that occur along the
1559 path to C{pos}."""
1560 rv = set()
1561 for cpos in self.counterPositions:
1562 if cpos == pos[:len(cpos)]:
1563 rv.add(cpos)
1564 return frozenset(rv)
1565
1567 """Obtain a description of the FAC in text format.
1568
1569 This is a diagnostic tool, returning first, last, and follow
1570 maps using positions."""
1571 rv = []
1572 rv.append('r\t= %s' % (str(self),))
1573 states = list(self.follow.iterkeys())
1574 rv.append('sym(r)\t= %s' % (' '.join(map(str, map(self.posNodeMap.get, states)))))
1575 rv.append('first(r)\t= %s' % (' '.join(map(str, self.first))))
1576 rv.append('last(r)\t= %s' % (' '.join(map(str, self.last))))
1577 rv.append('C\t= %s' % (' '.join(map(str, self.counterPositions))))
1578 for pos in self.first:
1579 rv.append('qI(%s) -> %s' % (self.posNodeMap[pos].metadata, str(pos)))
1580 for spos in states:
1581 for (dpos, transition_set) in self.follow[spos]:
1582 dst = self.posNodeMap[dpos]
1583 uv = []
1584 for (c, u) in transition_set.iteritems():
1585 uv.append('%s %s' % (u == self.INCREMENT and "inc" or "rst", str(c)))
1586 rv.append('%s -%s-> %s ; %s' % (str(spos), dst.metadata, str(dpos), ' ; '.join(uv)))
1587 return '\n'.join(rv)
1588
1590 """Intermediary for nodes that have multiple child nodes."""
1591
1592 __terms = None
1594 """The set of subordinate terms of the current node."""
1595 return self.__terms
1596 terms = property(__get_terms)
1597
1599 """Term that collects an ordered sequence of terms.
1600
1601 The terms are provided as arguments. All must be instances of
1602 a subclass of L{Node}."""
1603 super(MultiTermNode, self).__init__(**kw)
1604 self.__terms = terms
1605
1609
1611 if pre is not None:
1612 pre(self, position, arg)
1613 for c in xrange(len(self.__terms)):
1614 self.__terms[c]._walkTermTree(position + (c,), pre, post, arg)
1615 if post is not None:
1616 post(self, position, arg)
1617
1619 """Intermediary for nodes that have no child nodes."""
1627 return { (): frozenset() }
1628
1630 if pre is not None:
1631 pre(self, position, arg)
1632 if post is not None:
1633 post(self, position, arg)
1634
1636 """A term with a numeric range constraint.
1637
1638 This corresponds to a "particle" in the XML Schema content model."""
1639
1640 _Precedence = -1
1641
1642 __min = None
1645 min = property(__get_min)
1646
1647 __max = None
1650 max = property(__get_max)
1651
1652 __term = None
1655 term = property(__get_term)
1656
1657 - def __init__ (self, term, min=0, max=1, **kw):
1658 """Term with a numerical constraint.
1659
1660 @param term: A term, the number of appearances of which is
1661 constrained in this term.
1662 @type term: L{Node}
1663
1664 @keyword min: The minimum number of occurrences of C{term}.
1665 The value must be non-negative.
1666
1667 @keyword max: The maximum number of occurrences of C{term}.
1668 The value must be positive (in which case it must also be no
1669 smaller than C{min}), or C{None} to indicate an unbounded
1670 number of occurrences."""
1671 super(NumericalConstraint, self).__init__(**kw)
1672 self.__term = term
1673 self.__min = min
1674 self.__max = max
1675
1678
1680 return [ (0,) + _fc for _fc in self.__term.first ]
1681
1683 return [ (0,) + _lc for _lc in self.__term.last ]
1684
1687
1706
1708 if pre is not None:
1709 pre(self, position, arg)
1710 self.__term._walkTermTree(position + (0,), pre, post, arg)
1711 if post is not None:
1712 post(self, position, arg)
1713
1722
1723 -class Choice (MultiTermNode):
1724 """A term that may be any one of a set of terms.
1725
1726 This term matches if any one of its contained terms matches."""
1727
1728 _Precedence = -3
1729
1731 """Term that selects one of a set of terms.
1732
1733 The terms are provided as arguments. All must be instances of
1734 a subclass of L{Node}."""
1735 super(Choice, self).__init__(*terms, **kw)
1736
1742
1748
1750 for t in self.terms:
1751 if t.nullable:
1752 return True
1753 return False
1754
1762
1771
1773 """A term that is an ordered sequence of terms."""
1774
1775 _Precedence = -2
1776
1778 """Term that collects an ordered sequence of terms.
1779
1780 The terms are provided as arguments. All must be instances of
1781 a subclass of L{Node}."""
1782 super(Sequence, self).__init__(*terms, **kw)
1783
1785 rv = set()
1786 c = 0
1787 while c < len(self.terms):
1788 t = self.terms[c]
1789 rv.update([ (c,) + _fc for _fc in t.first])
1790 if not t.nullable:
1791 break
1792 c += 1
1793 return rv
1794
1796 rv = set()
1797 c = len(self.terms) - 1
1798 while 0 <= c:
1799 t = self.terms[c]
1800 rv.update([ (c,) + _lc for _lc in t.last])
1801 if not t.nullable:
1802 break
1803 c -= 1
1804 return rv
1805
1807 for t in self.terms:
1808 if not t.nullable:
1809 return False
1810 return True
1811
1813 rv = {}
1814 for c in xrange(len(self.terms)):
1815 pp = (c,)
1816 for (q, transition_set) in self.terms[c].follow.iteritems():
1817 rv[pp + q] = self._PosConcatTransitionSet(pp, transition_set)
1818 for c in xrange(len(self.terms)-1):
1819 t = self.terms[c]
1820 pp = (c,)
1821
1822
1823
1824 for q in t.last:
1825 psi = {}
1826 for p1 in t.counterSubPositions(q):
1827 psi[pp + p1] = self.RESET
1828 nc = c
1829 while nc+1 < len(self.terms):
1830 nc += 1
1831 nt = self.terms[nc]
1832 for sq1 in nt.first:
1833 q1 = (nc,) + sq1
1834 rv[pp+q].append((q1, psi))
1835 if not nt.nullable:
1836 break
1837 return rv
1838
1847
1848 -class All (MultiTermNode, LeafNode):
1849 """A term that is an unordered sequence of terms.
1850
1851 Note that the inheritance structure for this node is unusual. It
1852 has multiple children when it is treated as a term tree, but is
1853 considered a leaf node when constructing an automaton.
1854 """
1855
1856 _Precedence = 0
1857
1859 """Term that collects an unordered sequence of terms.
1860
1861 The terms are provided as arguments. All must be instances of
1862 a subclass of L{Node}."""
1863 super(All, self).__init__(*terms, **kw)
1864
1866 for t in self.terms:
1867 if not t.nullable:
1868 return False
1869 return True
1870
1871 @classmethod
1873 """Create a term tree that implements unordered catenation of
1874 the terms.
1875
1876 This expansion results in a standard choice/sequence term
1877 tree, at the cost of quadratic state expansion because terms
1878 are L{cloned<Node.clone>} as required to satisfy the tree
1879 requirements of the term tree.
1880
1881 @param terms: The tuple of terms that are elements of an
1882 accepted sequence.
1883
1884 @return: A term tree comprising a choice between sequences
1885 that connect each term to the unordered catenation of the
1886 remaining terms."""
1887 if 1 == len(terms):
1888 return terms[0]
1889 disjuncts = []
1890 for i in xrange(len(terms)):
1891 n = terms[i]
1892 rem = map(lambda _s: _s.clone(), terms[:i] + terms[i+1:])
1893 disjuncts.append(Sequence(n, cls.CreateTermTree(*rem)))
1894 return Choice(*disjuncts)
1895
1897 return u'&(' + ','.join([str(_t) for _t in self.terms]) + ')'
1898
1900 """A leaf term that is a symbol.
1901
1902 The symbol is represented by the L{metadata} field."""
1903
1904 _Precedence = 0
1905
1909
1912
1915