1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 """This module provides Finite Automata with Counters.
17
18 FACs are type of state machine where a transition may include a
19 constraint and a modification to a set of counters. They are used to
20 implement regular expressions with numerical constraints, as are found
21 in POSIX regexp, Perl, and XML schema.
22
23 The implementation here derives from U{Regular Expressions with
24 Numerical Constraints and Automata with Counters
25 <https://bora.uib.no/bitstream/1956/3628/3/Hovland_LNCS%205684.pdf>},
26 Dag Hovland, Lecture Notes in Computer Science, 2009, Volume 5684,
27 Theoretical Aspects of Computing - ICTAC 2009, Pages 231-245. In what
28 follows, this reference will be denoted B{HOV09}.
29
30 A regular expression is directly translated into a term tree, where
31 nodes are operators such as sequence, choice, and counter
32 restrictions, and the leaf nodes denote symbols in the language of the
33 regular expression.
34
35 In the case of XML content models, the symbols include L{element
36 declarations <pyxb.xmlschema.structures.ElementDeclaration>} and
37 L{wildcard elements <pyxb.xmlschema.structures.Wildcard>}. A
38 numerical constraint node corresponds to an L{XML particle
39 <pyxb.xmlschema.structures.Particle>}, and choice and sequence nodes
40 derive from L{model groups <pyxb.xmlschema.structures.ModelGroup>} of
41 types B{choice} and B{sequence}. As suggested in U{The Membership
42 Problem for Regular Expressions with Unordered Concatenation and
43 Numerical Constraints <http://www.ii.uib.no/~dagh/presLATA2012.pdf>}
44 the B{all} content model can be translated into state machine using
45 choice and sequence at the cost of a quadratic size explosion. Since
46 some XML content models might have a hundred terms in an unordered
47 catenation, this is not acceptable, and the implementation here
48 optimizes this construct by creating a leaf node in the automaton
49 which in turn contains sub-automata for each term, and permits an exit
50 transition only when all the terms that are required have been
51 completed.
52
53 @note: In XSD 1.1 the restriction that terms in an B{all} model group
54 occur at most once has been removed. Since the current implementation
55 removes a completed term from the set of available terms, this will
56 not work: instead the subconfiguration with its counter values must be
57 retained between matches.
58 """
59
60 import operator
61 import logging
62
63 log_ = logging.getLogger(__name__)
67
69 """Exception raised when a FAC term tree is not a tree.
70
71 For example, a L{Symbol} node appears multiple times, or a cycle is detected."""
72
73 parent = None
74 """The L{MultiTermNode} containing the term that proves invalidity"""
75
76 term = None
77 """The L{Node} that proves invalidity"""
78
82
84 """Exception raised when an unsatisfied update instruction is executed.
85
86 This indicates an internal error in the implementation."""
87
88 update_instruction = None
89 """The L{UpdateInstruction} with an unsatisfied L{CounterCondition}"""
90
91 values = None
92 """The unsatisfying value map from L{CounterCondition} instances to integers"""
93
97
99 """Symbol rejected by L{Configuration_ABC.step}.
100
101 The exception indicates that the proposed symbol either failed to
102 produce a transition (L{UnrecognizedSymbolError}) or produced
103 multiple equally valid transitions
104 (L{NondeterministicSymbolError})."""
105
106 configuration = None
107 """The instance of L{Configuration_ABC} that raised the exception.
108 From L{Configuration_ABC.acceptableSymbols} you can determine what
109 alternatives might be present."""
110
111 symbol = None
112 """The symbol that was not accepted."""
113
117 acceptable = property(__get_acceptable)
118
122
124 """L{Configuration.step} failed to find a valid transition."""
125 pass
126
128 """L{Configuration.step} found multiple transitions."""
129 pass
130
132 """Mix-in used by symbols to provide a custom match implementation.
133
134 If a L{State.symbol} value is an instance of this mix-in, then it
135 will be used to validate a candidate symbol for a match."""
136
137 - def match (self, symbol):
138 raise NotImplementedError('%s.match' % (type(self).__name__,))
139
141 """A thin wrapper around an object reference.
142
143 The state of the automaton corresponds to a position, or marked
144 symbol, in the term tree. Because the same symbol may appear at
145 multiple locations in the tree, and the distinction between these
146 positions is critical, a L{State} wrapper is provided to maintain
147 distinct values."""
148
149 - def __init__ (self, symbol, is_initial, final_update=None, is_unordered_catenation=False):
150 """Create a FAC state.
151
152 @param symbol: The symbol associated with the state.
153 Normally initialized from the L{Symbol.metadata} value. The
154 state may be entered if, among other conditions, the L{match}
155 routine accepts the proposed input as being consistent with
156 this value.
157
158 @param is_initial: C{True} iff this state may serve as the
159 first state of the automaton.
160
161 @param final_update: C{None} if this state is not an
162 accepting state of the automaton; otherwise a set of
163 L{UpdateInstruction} values that must be satisfied by the
164 counter values in a configuration as a further restriction of
165 acceptance.
166
167 @param is_unordered_catenation: C{True} if this state has
168 subautomata that must be matched to execute the unordered
169 catenation of an L{All} node; C{False} if this is a regular
170 symbol."""
171 self.__symbol = symbol
172 self.__isInitial = not not is_initial
173 self.__finalUpdate = final_update
174 self.__isUnorderedCatenation = is_unordered_catenation
175
176 __automaton = None
178 """Link to the L{Automaton} to which the state belongs."""
179 return self.__automaton
181 """Method invoked during automaton construction to set state owner."""
182 assert self.__automaton is None
183 self.__automaton = automaton
184 return self
185 automaton = property(__get_automaton)
186
187 __symbol = None
189 """Application-specific metadata identifying the symbol.
190
191 See also L{match}."""
192 return self.__symbol
193 symbol = property(__get_symbol)
194
195 __isUnorderedCatenation = None
197 """Indicate whether the state has subautomata for unordered
198 catenation.
199
200 To reduce state explosion due to non-determinism, such a state
201 executes internal transitions in subautomata until all terms
202 have matched or a failure is discovered."""
203 return self.__isUnorderedCatenation
204 isUnorderedCatenation = property(__get_isUnorderedCatenation)
205
206 __subAutomata = None
208 """A sequence of sub-automata supporting internal state transitions.
209
210 This will return C{None} unless L{isUnorderedCatenation} is C{True}."""
211 return self.__subAutomata
216 subAutomata = property(__get_subAutomata)
217
218 __isInitial = None
220 """C{True} iff this state may be the first state the automaton enters."""
221 return self.__isInitial
222 isInitial = property(__get_isInitial)
223
224 __automatonEntryTransitions = None
226 """Return the set of initial transitions allowing entry to the automata through this state.
227
228 These are structurally-permitted transitions only, and must be
229 filtered based on the symbol that might trigger the
230 transition. The results are not filtered based on counter
231 value, since this value is used to determine how the
232 containing automaton might be entered. Consequently the
233 return value is the empty set unless this is an initial state.
234
235 The returned set is closed under entry to sub-automata,
236 i.e. it is guaranteed that each transition includes a
237 consuming state even if it requires a multi-element chain of
238 transitions into subautomata to reach one."""
239 if self.__automatonEntryTransitions is None:
240 transitions = []
241 if self.__isInitial:
242 xit = Transition(self, set())
243 if self.__subAutomata is None:
244 transitions.append(xit)
245 else:
246 for sa in self.__subAutomata:
247 for saxit in sa.initialTransitions:
248 transitions.append(xit.chainTo(saxit.makeEnterAutomatonTransition()))
249 self.__automatonEntryTransitions = transitions
250 return self.__automatonEntryTransitions
251 automatonEntryTransitions = property(__get_automatonEntryTransitions)
252
253 __finalUpdate = None
255 """Return the update instructions that must be satisfied for this to be a final state."""
256 return self.__finalUpdate
257 finalUpdate = property(__get_finalUpdate)
258
260 """Return the set of candidate transitions to enter a sub-automaton of this state.
261
262 @param sub_automata: A subset of the sub-automata of this
263 state which should contribute to the result. If C{None}, all
264 sub-automata are used.
265
266 @return: A pair C{(nullable, transitions)} where C{nullable}
267 is C{True} iff there is at least one sub-automaton that is in
268 an accepting state on entry, and C{transitions} is a list of
269 L{Transition} instances describing how to reach some state in
270 a sub-automaton via a consumed symbol.
271 """
272 assert self.__subAutomata is not None
273 is_nullable = True
274 transitions = []
275 if sub_automata is None:
276 sub_automata = self.__subAutomata
277 for sa in sub_automata:
278 if not sa.nullable:
279 is_nullable = False
280 transitions.extend(sa.initialTransitions)
281 return (is_nullable, transitions)
282
284 """C{True} iff this state is an accepting state for the automaton.
285
286 @param counter_values: Counter values that further validate
287 whether the requirements of the automaton have been met.
288
289 @return: C{True} if this is an accepting state and the
290 counter values relevant at it are satisfied."""
291 if self.__finalUpdate is None:
292 return False
293 return UpdateInstruction.Satisfies(counter_values, self.__finalUpdate)
294
295 __transitionSet = None
297 """Definitions of viable transitions from this state.
298
299 The transition set of a state is a set of L{Transition} nodes
300 identifying a state reachable in a single step from this
301 state, and a set of counter updates that must apply if the
302 transition is taken.
303
304 These transitions may not in themselves consume a symbol. For
305 example, if the destination state represents a match of an
306 L{unordered catenation of terms<All>}, then secondary
307 processing must be done to traverse into the automata for
308 those terms and identify transitions that include a symbol
309 consumption.
310
311 @note: Although conceptually the viable transitions are a set,
312 this implementation maintains them in a list so that order is
313 preserved when automata processing becomes non-deterministic.
314 PyXB is careful to build the transition list so that the
315 states are attempted in the order in which they appear in the
316 schema that define the automata.
317 """
318 return self.__transitionSet
319 transitionSet = property(__get_transitionSet)
320
322 """Method invoked during automaton construction to set the
323 legal transitions from the state.
324
325 The set of transitions cannot be defined until all states that
326 appear in it are available, so the creation of the automaton
327 requires that the association of the transition set be
328 delayed. (Though described as a set, the transitions are a
329 list where order reflects priority.)
330
331 @param transition_set: a list of pairs where the first
332 member is the destination L{State} and the second member is the
333 set of L{UpdateInstruction}s that apply when the automaton
334 transitions to the destination state."""
335
336 self.__transitionSet = []
337 seen = set()
338 for xit in transition_set:
339 if not (xit in seen):
340 seen.add(xit)
341 self.__transitionSet.append(xit)
342
343 - def match (self, symbol):
344 """Return C{True} iff the symbol matches for this state.
345
346 This may be overridden by subclasses when matching by
347 equivalence does not work. Alternatively, if the symbol
348 stored in this node is a subclass of L{SymbolMatch_mixin}, then
349 its match method will be used. Otherwise C{symbol} matches
350 only if it is equal to the L{symbol} of this state.
351
352 @param symbol: A candidate symbol corresponding to the
353 expression symbol for this state.
354
355 @return: C{True} iff C{symbol} is a match for this state.
356 """
357 if isinstance(self.__symbol, SymbolMatch_mixin):
358 return self.__symbol.match(symbol)
359 return self.__symbol == symbol
360
362 return 'S.%x' % (id(self),)
363
364 - def _facText (self):
365 rv = []
366 rv.extend(map(str, self.__transitionSet))
367 if self.__finalUpdate is not None:
368 if 0 == len(self.__finalUpdate):
369 rv.append('Final (no conditions)')
370 else:
371 rv.append('Final if %s' % (' '.join(map(lambda _ui: str(_ui.counterCondition), self.__finalUpdate))))
372 return '\n'.join(rv)
373
375 """A counter condition is a range limit on valid counter values.
376
377 Instances of this class serve as keys for the counters that
378 represent the configuration of a FAC. The instance also maintains
379 a pointer to application-specific L{metadata}."""
380
381 __min = None
383 """The minimum legal value for the counter.
384
385 This is a non-negative integer."""
386 return self.__min
387 min = property(__get_min)
388
389 __max = None
391 """The maximum legal value for the counter.
392
393 This is a positive integer, or C{None} to indicate that the
394 counter is unbounded."""
395 return self.__max
396 max = property(__get_max)
397
398 __metadata = None
402 metadata = property(__get_metadata)
403
404 - def __init__ (self, min, max, metadata=None):
405 """Create a counter condition.
406
407 @param min: The value for L{min}
408 @param max: The value for L{max}
409 @param metadata: The value for L{metadata}
410 """
411 self.__min = min
412 self.__max = max
413 self.__metadata = metadata
414
417
423
425 return not self.__eq__(other)
426
428 return 'C.%x{%s,%s}' % (id(self), self.min, self.max is not None and self.max or '')
429
431 """An update instruction pairs a counter with a mutation of that
432 counter.
433
434 The instruction is executed during a transition from one state to
435 another, and causes the corresponding counter to be incremented or
436 reset. The instruction may only be applied if doing so does not
437 violate the conditions of the counter it affects."""
438
439 __counterCondition = None
441 """A reference to the L{CounterCondition} identifying the
442 counter to be updated.
443
444 The counter condition instance is used as a key to the
445 dictionary maintaining current counter values."""
446 return self.__counterCondition
447 counterCondition = property(__get_counterCondition)
448
449 __doIncrement = None
451 """C{True} if the counter is to be incremented; C{False} if it is to be reset."""
452 return self.__doIncrement
453 doIncrement = property(__get_doIncrement)
454
455
456 __min = None
457 __max = None
458
459 - def __init__ (self, counter_condition, do_increment):
460 """Create an update instruction.
461
462 @param counter_condition: A L{CounterCondition} identifying a
463 minimum and maximum value for a counter, and serving as a map
464 key for the value of the corresponding counter.
465
466 @param do_increment: C{True} if the update is to increment
467 the value of the counter; C{False} if the update is to reset
468 the counter.
469 """
470 self.__counterCondition = counter_condition
471 self.__doIncrement = not not do_increment
472 self.__min = counter_condition.min
473 self.__max = counter_condition.max
474
476 """Implement a component of definition 5 from B{HOV09}.
477
478 The update instruction is satisfied by the counter values if
479 its action may be legitimately applied to the value of its
480 associated counter.
481
482 @param counter_values: A map from L{CounterCondition}s to
483 non-negative integers
484
485 @return: C{True} or C{False}
486 """
487 value = counter_values[self.__counterCondition]
488 if self.__doIncrement \
489 and (self.__max is not None) \
490 and (value >= self.__max):
491 return False
492 if (not self.__doIncrement) \
493 and (value < self.__min):
494 return False
495 return True
496
497 @classmethod
498 - def Satisfies (cls, counter_values, update_instructions):
499 """Return C{True} iff the counter values satisfy the update
500 instructions.
501
502 @param counter_values: A map from L{CounterCondition} to
503 integer counter values
504
505 @param update_instructions: A set of L{UpdateInstruction}
506 instances
507
508 @return: C{True} iff all instructions are satisfied by the
509 values and limits."""
510 for psi in update_instructions:
511 if not psi.satisfiedBy(counter_values):
512 return False
513 return True
514
515 - def apply (self, counter_values):
528
529 @classmethod
530 - def Apply (cls, update_instructions, counter_values):
531 """Apply the update instructions to the counter values.
532
533 @param update_instructions: A set of L{UpdateInstruction}
534 instances.
535
536 @param counter_values: A map from L{CounterCondition}
537 instances to non-negative integers. This map is updated
538 in-place by applying each instruction in
539 C{update_instructions}."""
540 for psi in update_instructions:
541 psi.apply(counter_values)
542
545
550
552 return not self.__eq__(other)
553
556
558 """Representation of a FAC state transition."""
559
560 __destination = None
562 """The transition destination state."""
563 return self.__destination
564 destination = property(__get_destination)
565
566 __updateInstructions = None
568 """The set of counter updates that are applied when the transition is taken."""
569 return self.__updateInstructions
570 updateInstructions = property(__get_updateInstructions)
571
572 __nextTransition = None
574 """The next transition to apply in this chain.
575
576 C{None} if this is the last transition in the chain."""
577 return self.__nextTransition
578 nextTransition = property(__get_nextTransition)
579
580 __layerLink = None
582 """A directive relating to changing automaton layer on transition.
583
584 C{None} indicates this transition is from one state to another
585 within a single automaton.
586
587 An instance of L{Configuration} is a transition on completion
588 of a subautomaton back to the configuration in the parent
589 automaton. The L{destination} is the state in the parent automaton.
590
591 An instance of L{Automaton} requires creation of a
592 sub-configuration and initial entry into the automaton. The
593 L{destination} is the state in the sub-automaton.
594 """
595 return self.__layerLink
596 layerLink = property(__get_layerLink)
597
598 - def __init__ (self, destination, update_instructions, layer_link=None):
599 """Create a transition to a state.
600
601 @param destination: the state into which the transition is
602 made
603
604 @param update_instructions: A iterable of L{UpdateInstruction}s
605 denoting the changes that must be made to counters as a
606 consequence of taking the transition.
607
608 @keyword layer_link: The value for L{layerLink}."""
609 self.__destination = destination
610 if not isinstance(update_instructions, list):
611 update_instructions = list(update_instructions)
612 self.__updateInstructions = update_instructions
613 self.__layerLink = layer_link
614
626
628 """Return the L{symbol<State.symbol>} of the L{consumingState}."""
629 return self.consumingState().symbol
630
661
662 - def apply (self, configuration, clone_map=None):
663 """Apply the transitition to a configuration.
664
665 This updates the configuration counter values based on the
666 update instructions, and sets the new configuration state.
667
668 @note: If the transition involves leaving a sub-automaton or
669 creating a new sub-automaton, the returned configuration
670 structure will be different from the one passed in. You
671 should invoke this as::
672
673 cfg = transition.apply(cfg)
674
675 @param configuration: A L{Configuration} of an executing automaton
676
677 @param clone_map: A map from L{Configuration} to
678 L{Configuration} reflecting the replacements made when the
679 configuration for which the transition was calculated was
680 subsequently cloned into the C{configuration} passed into this
681 method. This is only necessary when the transition includes
682 layer transitions.
683
684 @return: The resulting configuration
685 """
686 layer_link = self.__layerLink
687 if isinstance(layer_link, Configuration):
688 if clone_map is not None:
689 layer_link = clone_map[layer_link]
690 configuration = layer_link.leaveAutomaton(configuration)
691 elif isinstance(layer_link, Automaton):
692 configuration = configuration.enterAutomaton(layer_link)
693 UpdateInstruction.Apply(self.updateInstructions, configuration._get_counterValues())
694 configuration._set_state(self.destination, layer_link is None)
695 if self.__nextTransition is None:
696 return configuration
697 return self.__nextTransition.apply(configuration, clone_map)
698
699 - def chainTo (self, next_transition):
700 """Duplicate the state and chain the duplicate to a successor
701 transition.
702
703 This returns a new transition which applies the operation for
704 this transition, then proceeds to apply the next transition in
705 the chain.
706
707 @note: The node that is invoking this must not have successor
708 transitions.
709
710 @param next_transition: A L{Transition} node describing a
711 subsequent transition.
712
713 @return: a clone of this node, augmented with a link to
714 C{next_transition}."""
715 assert not self.__nextTransition
716 head = type(self)(self.__destination, self.__updateInstructions, layer_link=self.__layerLink)
717 head.__nextTransition = next_transition
718 return head
719
730
736
743
745 return not self.__eq__(other)
746
761
763 """Base class for something that represents an L{Automaton} in
764 execution.
765
766 For deterministic automata, this is generally a L{Configuration}
767 which records the current automaton state along with its counter
768 values.
769
770 For non-deterministic automata, this is a L{MultiConfiguration}
771 which records a set of L{Configuration}s."""
772
774 """Return the acceptable L{Symbol}s given the current
775 configuration.
776
777 This method extracts the symbol from all candidate transitions
778 that are permitted based on the current counter values.
779 Because transitions are presented in a preferred order, the
780 symbols are as well."""
781 raise NotImplementedError('%s.acceptableSymbols' % (type(self).__name__,))
782
783 - def step (self, symbol):
784 """Execute an automaton transition using the given symbol.
785
786 @param symbol: A symbol from the alphabet of the automaton's
787 language. This is a Python value that should be accepted by
788 the L{SymbolMatch_mixin.match} method of a L{State.symbol}.
789 It is not a L{Symbol} instance.
790
791 @return: The new configuration resulting from the step.
792
793 @raises AutomatonStepError: L{UnrecognizedSymbolError}
794 when no transition compatible with C{symbol} is available, and
795 L{NondeterministicSymbolError} if C{symbol} admits multiple
796 transitions and the subclass does not support
797 non-deterministic steps (see L{MultiConfiguration}).
798
799 @warning: If the step entered or left a sub-automaton the
800 return value will not be the configuration that was used to
801 execute the step. The proper pattern for using this method
802 is::
803
804 cfg = cfg.step(sym)
805
806 """
807 raise NotImplementedError('%s.step' % (type(self).__name__,))
808
810 """The state of an L{Automaton} in execution.
811
812 This combines a state node of the automaton with a set of counter
813 values."""
814
815 __state = None
817 """The state of the configuration.
818
819 This is C{None} to indicate an initial state, or one of the underlying automaton's states."""
820 return self.__state
847 state = property(__get_state)
848
849 __counterValues = None
850 """The values of the counters.
851
852 This is a map from the CounterCondition instances of the
853 underlying automaton to integer values."""
856
857 __automaton = None
860 automaton = property(__get_automaton)
861
862 __subConfiguration = None
864 """Reference to configuration being executed in a sub-automaton.
865
866 C{None} if no sub-automaton is active, else a reference to a
867 configuration that is being executed in a sub-automaton.
868
869 Sub-configurations are used to match sub-terms in an
870 L{unordered catenation<All>} term. A configuration may have
871 at most one sub-configuration at a time, and the configuration
872 will be removed and possibly replaced when the term being
873 processed completes."""
874 return self.__subConfiguration
875 subConfiguration = property(__get_subConfiguration)
876
877 __superConfiguration = None
879 """Reference to the configuration for which this is a
880 sub-configuration.
881
882 C{None} if no super-automaton is active, else a reference to a
883 configuration that is being executed in a super-automaton.
884
885 The super-configuration relation persists for the lifetime of
886 the configuration."""
887 return self.__superConfiguration
888 superConfiguration = property(__get_superConfiguration)
889
890 __subAutomata = None
892 """A set of automata that must be satisfied before the current state can complete.
893
894 This is used in unordered catenation. Each sub-automaton
895 represents a term in the catenation. When the configuration
896 enters a state with sub-automata, a set containing references
897 to those automata is assigned to this attribute.
898 Subsequently, until all automata in the state are satisfied,
899 transitions can only occur within an active sub-automaton, out
900 of the active sub-automaton if it is in an accepting state,
901 and into a new sub-automaton if no sub-automaton is active.
902 """
903 return self.__subAutomata
906 subAutomata = property(__get_subAutomata)
907
909 """Create a transition back to the containing configuration.
910
911 This is done when a configuration is in an accepting state and
912 there are candidate transitions to other states that must be
913 considered. The transition does not consume a symbol."""
914 assert self.__superConfiguration is not None
915 return Transition(self.__superConfiguration.__state, set(), layer_link=self.__superConfiguration)
916
918 """Execute steps to leave a sub-automaton.
919
920 @param sub_configuration: The configuration associated with
921 the automata that has completed.
922
923 @return: C{self}"""
924 assert sub_configuration.__superConfiguration == self
925 self.__subConfiguration = None
926 return self
927
945
948
955
957 """Return list of viable transitions on C{symbol}
958
959 The transitions that are structurally permitted from this
960 state, in order, filtering out those transitions where the
961 update instruction is not satisfied by the configuration
962 counter values and optionally those for which the symbol does
963 not match.
964
965 @param symbol: A symbol through which a transition from this
966 state is intended. A value of C{None} indicates that the set
967 of transitions should ignore the symbol; candidates are still
968 filtered based on the counter state of the configuration.
969
970 @return: A list of L{Transition} instances permitted from the
971 current configuration. If C{symbol} is not C{None},
972 transitions that would not accept the symbol are excluded.
973 Any transition that would require an unsatisfied counter
974 update is also excluded. Non-deterministic automata may
975 result in a lits with multiple members. """
976
977 fac = self.__automaton
978 transitions = []
979 if symbol is None:
980 match_filter = lambda _xit: True
981 else:
982 match_filter = lambda _xit: _xit.consumingState().match(symbol)
983 update_filter = lambda _xit: _xit.satisfiedBy(self)
984
985 if self.__state is None:
986
987 transitions.extend(fac.initialTransitions)
988 elif (self.__subConfiguration is not None) and not self.__subConfiguration.isAccepting():
989
990
991 pass
992 else:
993
994
995 include_local = True
996 if self.__subAutomata:
997
998
999
1000 (include_local, sub_initial) = self.__state.subAutomataInitialTransitions(self.__subAutomata)
1001 transitions.extend(map(lambda _xit: _xit.makeEnterAutomatonTransition(), sub_initial))
1002 if include_local:
1003
1004 for xit in filter(update_filter, self.__state.transitionSet):
1005 if xit.consumingState() is not None:
1006 transitions.append(xit)
1007 else:
1008
1009
1010
1011
1012 (_, sub_initial) = xit.destination.subAutomataInitialTransitions()
1013 transitions.extend(map(lambda _xit: xit.chainTo(_xit.makeEnterAutomatonTransition()), sub_initial))
1014 if (self.__superConfiguration is not None) and self.isAccepting():
1015
1016 lxit = self.makeLeaveAutomatonTransition()
1017 supxit = self.__superConfiguration.candidateTransitions(symbol)
1018 transitions.extend(map(lambda _sx: lxit.chainTo(_sx), supxit))
1019 assert len(frozenset(transitions)) == len(transitions)
1020 return filter(update_filter, filter(match_filter, transitions))
1021
1024
1025 - def step (self, symbol):
1032
1034 """Return C{True} iff no transitions have ever been made."""
1035 return self.__state is None
1036
1051
1052 - def __init__ (self, automaton, super_configuration=None):
1056
1057 - def clone (self, clone_map=None):
1058 """Clone a configuration and its descendents.
1059
1060 This is used for parallel execution where a configuration has
1061 multiple candidate transitions and must follow all of them.
1062 It clones the entire chain of configurations through
1063 multiple layers.
1064
1065 @param clone_map: Optional map into which the translation from
1066 the original configuration object to the corresponding cloned
1067 configuration object can be reconstructed, e.g. when applying
1068 a transition that includes automata exits referencing
1069 superconfigurations from the original configuration.
1070 """
1071 if clone_map is None:
1072 clone_map = {}
1073 root = self
1074 while root.__superConfiguration is not None:
1075 root = root.__superConfiguration
1076 root = root._clone(clone_map, None)
1077 return clone_map.get(self)
1078
1079 - def _clone (self, clone_map, super_configuration):
1091
1094
1096 """Support parallel execution of state machine.
1097
1098 This holds a set of configurations, and executes each transition
1099 on each one. Configurations which fail to accept a step are
1100 silently dropped; only if this results in no remaining
1101 configurations will L{UnrecognizedSymbolError} be raised. If a
1102 step admits multiple valid transitions, a configuration is added
1103 for each one.
1104
1105 See L{pyxb.binding.content.AutomatonConfiguration} for an
1106 alternative solution which holds actions associated with the
1107 transition until the non-determinism is resolved."""
1108
1109 __configurations = None
1110
1113
1119
1120 - def step (self, symbol):
1121 next_configs = []
1122 for cfg in self.__configurations:
1123 transitions = cfg.candidateTransitions(symbol)
1124 if 0 == len(transitions):
1125 pass
1126 elif 1 == len(transitions):
1127 next_configs.append(transitions[0].apply(cfg))
1128 else:
1129 for transition in transitions:
1130 clone_map = {}
1131 ccfg = cfg.clone(clone_map)
1132 next_configs.append(transition.apply(ccfg, clone_map))
1133 if 0 == len(next_configs):
1134 raise UnrecognizedSymbolError(self, symbol)
1135 assert len(frozenset(next_configs)) == len(next_configs)
1136 self.__configurations = next_configs
1137 return self
1138
1140 """Return the set of configurations that are in an accepting state.
1141
1142 Note that some of the configurations may be within a
1143 sub-automaton; their presence in the return value is because
1144 the root configuration is also accepting."""
1145 accepting = []
1146 for cfg in self.__configurations:
1147 rcfg = cfg
1148
1149
1150 while rcfg.superConfiguration is not None:
1151 rcfg = rcfg.superConfiguration
1152 if rcfg.isAccepting():
1153 accepting.append(cfg)
1154 return accepting
1155
1157 """Representation of a Finite Automaton with Counters.
1158
1159 This has all the standard FAC elements, plus links to other
1160 states/automata as required to support the nested automata
1161 construct used for matching unordered catenation terms."""
1162 __states = None
1164 """The set of L{State}s in the automaton.
1165
1166 These correspond essentially to marked symbols in the original
1167 regular expression, or L{element
1168 declarations<pyxb.xmlschema.structures.ElementDeclaration>} in
1169 an XML schema.
1170
1171 @note: These are conceptually a set and are stored that way.
1172 When an L{Automaton} is constructed the incoming states should
1173 be passed as a list so the calculated initial transitions are
1174 executed in a deterministic order."""
1175 return self.__states
1176 states = property(__get_states)
1177
1178 __counterConditions = None
1180 """The set of L{CounterCondition}s in the automaton.
1181
1182 These are marked positions in the regular expression, or
1183 L{particles<pyxb.xmlschema.structures.Particle>} in an XML
1184 schema, paired with their occurrence constraints."""
1185 return self.__counterConditions
1186 counterConditions = property(__get_counterConditions)
1187
1188 __nullable = None
1190 """C{True} iff the automaton accepts the empty string."""
1191 return self.__nullable
1192 nullable = property(__get_nullable)
1193
1194 __initialTransitions = None
1196 """The set of transitions that may be made to enter the automaton.
1197
1198 These are full transitions, including chains into subautomata
1199 if an initial state represents a node with sub-automata.
1200
1201 @note: As with L{State.transitionSet}, the set is represented
1202 as a list to preserve priority when resolving
1203 non-deterministic matches."""
1204 return self.__initialTransitions
1205 initialTransitions = property(__get_initialTransitions)
1206
1207 __containingState = None
1209 """The L{State} instance for which this is a sub-automaton.
1210
1211 C{None} if this is not a sub-automaton."""
1212 return self.__containingState
1213 containingState = property(__get_containingState)
1214
1215 __finalStates = None
1217 """The set of L{State} members which can terminate a match."""
1218 return self.__finalStates
1219 finalStates = property(__get_finalStates)
1220
1221 - def __init__ (self, states, counter_conditions, nullable, containing_state=None):
1238
1240 """Return a new L{Configuration} instance for this automaton."""
1241 return Configuration(self)
1242
1258
1259 -class Node (object):
1260 """Abstract class for any node in the term tree.
1261
1262 In its original form a B{position} (C{pos}) is a tuple of
1263 non-negative integers comprising a path from a node in the term
1264 tree. It identifies a node in the tree. After the FAC has been
1265 constructed, only positions that are leaf nodes in the term tree
1266 remain, and the corresponding symbol value (Python instance) is
1267 used as the position.
1268
1269 An B{update instruction} (C{psi}) is a map from positions to
1270 either L{Node.RESET} or L{Node.INCREMENT}. It identifies actions
1271 to be taken on the counter states corresponding to the positions
1272 in its domain.
1273
1274 A B{transition} is a pair containing a position and an update
1275 instruction. It identifies a potential next node in the state and
1276 the updates that are to be performed if the transition is taken.
1277
1278 A B{follow value} is a map from a position to a set of transitions
1279 that may originate from the pos. This set is represented as a
1280 Python list since update instructions are dicts and cannot be
1281 hashed.
1282 """
1283
1284 _Precedence = None
1285 """An integral value used for parenthesizing expressions.
1286
1287 A subterm that has a precedence less than that of its containing
1288 term must be enclosed in parentheses when forming a text
1289 expression representing the containing term."""
1290
1291 RESET = False
1292 """An arbitrary value representing reset of a counter."""
1293
1294 INCREMENT = True
1295 """An arbitrary value representing increment of a counter."""
1296
1298 """Create a FAC term-tree node.
1299
1300 @keyword metadata: Any application-specific metadata retained in
1301 the term tree for transfer to the resulting automaton."""
1302 self.__metadata = kw.get('metadata')
1303
1304 - def clone (self, *args, **kw):
1305 """Create a deep copy of the node.
1306
1307 All term-tree--related attributes and properties are replaced
1308 with deep clones. Other attributes are preserved.
1309
1310 @param args: A tuple of arguments to be passed to the instance
1311 constructor.
1312
1313 @param kw: A dict of keywords to be passed to the instance
1314 constructor.
1315
1316 @note: Subclasses should pre-extend this method to augment the
1317 C{args} and C{kw} parameters as necessary to match the
1318 expectations of the C{__init__} method of the class being
1319 cloned."""
1320 kw.setdefault('metadata', self.metadata)
1321 return type(self)(*args, **kw)
1322
1323 __metadata = None
1327 metadata = property(__get_metadata)
1328
1329 __first = None
1331 """The I{first} set for the node.
1332
1333 This is the set of positions leading to symbols that can
1334 appear first in a string matched by an execution starting at
1335 the node."""
1336 if self.__first is None:
1337 self.__first = frozenset(self._first())
1338 return self.__first
1339 first = property(__get_first)
1340
1342 """Abstract method that defines L{first} for the subclass.
1343
1344 The return value should be an iterable of tuples of integers
1345 denoting paths from this node through the term tree to a
1346 symbol."""
1347 raise NotImplementedError('%s.first' % (type(self).__name__,))
1348
1349 __last = None
1351 """The I{last} set for the node.
1352
1353 This is the set of positions leading to symbols that can
1354 appear last in a string matched by an execution starting at
1355 the node."""
1356 if self.__last is None:
1357 self.__last = frozenset(self._last())
1358 return self.__last
1359 last = property(__get_last)
1360
1362 """Abstract method that defines L{last} for the subclass.
1363
1364 The return value should be an iterable of tuples of integers
1365 denoting paths from this node through the term tree to a
1366 symbol."""
1367 raise NotImplementedError('%s.last' % (type(self).__name__,))
1368
1369 __nullable = None
1375 nullable = property(__get_nullable)
1376
1378 """Abstract method that defines L{nullable} for the subclass.
1379
1380 The return value should be C{True} or C{False}."""
1381 raise NotImplementedError('%s.nullable' % (type(self).__name__,))
1382
1383 __follow = None
1389 follow = property(__get_follow)
1390
1392 """Abstract method that defines L{follow} for the subclass.
1393
1394 The return value should be a map from tuples of integers (positions)
1395 to a list of transitions, where a transition is a position and
1396 an update instruction."""
1397 raise NotImplementedError('%s.follow' % (type(self).__name__,))
1398
1400 """Reset any term-tree state associated with the node.
1401
1402 Any change to the structure of the term tree in which the node
1403 appears invalidates memoized first/follow sets and related
1404 information. This method clears all that data so it can be
1405 recalculated. It does not clear the L{metadata} link, or any
1406 existing structural data."""
1407 self.__first = None
1408 self.__last = None
1409 self.__nullable = None
1410 self.__follow = None
1411 self.__counterPositions = None
1412
1414 """Utility function for term tree processing.
1415
1416 @param pre: a callable that, unless C{None}, is invoked at
1417 each node C{n} with parameters C{n}, C{pos}, and C{arg}, where
1418 C{pos} is the tuple of integers identifying the path from the
1419 node at on which this method was invoked to the node being
1420 processed. The invocation occurs before processing any
1421 subordinate nodes.
1422
1423 @param post: as with C{pre} but invocation occurs after
1424 processing any subordinate nodes.
1425
1426 @param arg: a value passed to invocations of C{pre} and
1427 C{post}."""
1428 self._walkTermTree((), pre, post, arg)
1429
1431 """Abstract method implementing L{walkTermTree} for the subclass."""
1432 raise NotImplementedError('%s.walkTermTree' % (type(self).__name__,))
1433
1434 __posNodeMap = None
1436 """A map from positions to nodes in the term tree."""
1437 if self.__posNodeMap is None:
1438 pnm = { }
1439 self.walkTermTree(lambda _n,_p,_a: _a.setdefault(_p, _n), None, pnm)
1440 self.__posNodeMap = pnm
1441 return self.__posNodeMap
1442 posNodeMap = property(__get_posNodeMap)
1443
1444 __nodePosMap = None
1453 nodePosMap = property(__get_nodePosMap)
1454
1455 @classmethod
1457 """Implement definition 11.1 in B{HOV09}."""
1458 return frozenset([ pos + _mp for _mp in pos_set ])
1459
1460 @classmethod
1462 """Implement definition 11.2 in B{HOV09}"""
1463 rv = {}
1464 for (q, v) in psi.iteritems():
1465 rv[pos + q] = v
1466 return rv
1467
1468 @classmethod
1470 """Implement definition 11.3 in B{HOV09}"""
1471 ts = []
1472 for (q, psi) in transition_set:
1473 ts.append((pos + q, cls._PosConcatUpdateInstruction(pos, psi) ))
1474 return ts
1475
1481
1483
1484
1485
1486 self.walkTermTree(self.__resetAndValidate, None, set())
1487
1488 counter_map = { }
1489 for pos in self.counterPositions:
1490 nci = self.posNodeMap.get(pos)
1491 assert isinstance(nci, NumericalConstraint)
1492 assert nci not in counter_map
1493 counter_map[pos] = ctr_cond_ctor(nci.min, nci.max, nci.metadata)
1494 counters = counter_map.values()
1495
1496 state_map = { }
1497 for pos in self.follow.iterkeys():
1498 sym = self.posNodeMap.get(pos)
1499 assert isinstance(sym, LeafNode)
1500 assert sym not in state_map
1501
1502
1503
1504 is_initial = pos in self.first
1505
1506
1507
1508 final_update = None
1509 if (() == pos and sym.nullable) or (pos in self.last):
1510
1511
1512
1513 final_update = set()
1514 for nci in map(counter_map.get, self.counterSubPositions(pos)):
1515 final_update.add(UpdateInstruction(nci, False))
1516 state_map[pos] = state_ctor(sym.metadata, is_initial=is_initial, final_update=final_update, is_unordered_catenation=isinstance(sym, All))
1517 if isinstance(sym, All):
1518 state_map[pos]._set_subAutomata(*map(lambda _s: _s.buildAutomaton(state_ctor, ctr_cond_ctor, containing_state=state_map[pos]), sym.terms))
1519 states = state_map.values()
1520
1521 for (spos, transition_set) in self.follow.iteritems():
1522 src = state_map[spos]
1523 phi = []
1524 for (dpos, psi) in transition_set:
1525 dst = state_map[dpos]
1526 uiset = set()
1527 for (counter, action) in psi.iteritems():
1528 uiset.add(UpdateInstruction(counter_map[counter], self.INCREMENT == action))
1529 phi.append(Transition(dst, uiset))
1530 src._set_transitionSet(phi)
1531
1532 return Automaton(states, counters, self.nullable, containing_state=containing_state)
1533
1534 __counterPositions = None
1536 """Implement definition 13.1 from B{HOV09}.
1537
1538 The return value is the set of all positions leading to
1539 L{NumericalConstraint} nodes for which either the minimum
1540 value is not 1 or the maximum value is not unbounded."""
1541 if self.__counterPositions is None:
1542 cpos = []
1543 self.walkTermTree(lambda _n,_p,_a: \
1544 isinstance(_n, NumericalConstraint) \
1545 and ((1 != _n.min) \
1546 or (_n.max is not None)) \
1547 and _a.append(_p),
1548 None, cpos)
1549 self.__counterPositions = frozenset(cpos)
1550 return self.__counterPositions
1551 counterPositions = property(__get_counterPositions)
1552
1554 """Implement definition 13.2 from B{HOV09}.
1555
1556 This is the subset of L{counterPositions} that occur along the
1557 path to C{pos}."""
1558 rv = set()
1559 for cpos in self.counterPositions:
1560 if cpos == pos[:len(cpos)]:
1561 rv.add(cpos)
1562 return frozenset(rv)
1563
1565 """Obtain a description of the FAC in text format.
1566
1567 This is a diagnostic tool, returning first, last, and follow
1568 maps using positions."""
1569 rv = []
1570 rv.append('r\t= %s' % (str(self),))
1571 states = self.follow.keys()
1572 rv.append('sym(r)\t= %s' % (' '.join(map(str, map(self.posNodeMap.get, states)))))
1573 rv.append('first(r)\t= %s' % (' '.join(map(str, self.first))))
1574 rv.append('last(r)\t= %s' % (' '.join(map(str, self.last))))
1575 rv.append('C\t= %s' % (' '.join(map(str, self.counterPositions))))
1576 for pos in self.first:
1577 rv.append('qI(%s) -> %s' % (self.posNodeMap[pos].metadata, str(pos)))
1578 for spos in states:
1579 for (dpos, transition_set) in self.follow[spos]:
1580 dst = self.posNodeMap[dpos]
1581 uv = []
1582 for (c, u) in transition_set.iteritems():
1583 uv.append('%s %s' % (u == self.INCREMENT and "inc" or "rst", str(c)))
1584 rv.append('%s -%s-> %s ; %s' % (str(spos), dst.metadata, str(dpos), ' ; '.join(uv)))
1585 return '\n'.join(rv)
1586
1588 """Intermediary for nodes that have multiple child nodes."""
1589
1590 __terms = None
1592 """The set of subordinate terms of the current node."""
1593 return self.__terms
1594 terms = property(__get_terms)
1595
1597 """Term that collects an ordered sequence of terms.
1598
1599 The terms are provided as arguments. All must be instances of
1600 a subclass of L{Node}."""
1601 super(MultiTermNode, self).__init__(**kw)
1602 self.__terms = terms
1603
1607
1609 if pre is not None:
1610 pre(self, position, arg)
1611 for c in xrange(len(self.__terms)):
1612 self.__terms[c]._walkTermTree(position + (c,), pre, post, arg)
1613 if post is not None:
1614 post(self, position, arg)
1615
1617 """Intermediary for nodes that have no child nodes."""
1625 return { (): frozenset() }
1626
1628 if pre is not None:
1629 pre(self, position, arg)
1630 if post is not None:
1631 post(self, position, arg)
1632
1634 """A term with a numeric range constraint.
1635
1636 This corresponds to a "particle" in the XML Schema content model."""
1637
1638 _Precedence = -1
1639
1640 __min = None
1643 min = property(__get_min)
1644
1645 __max = None
1648 max = property(__get_max)
1649
1650 __term = None
1653 term = property(__get_term)
1654
1655 - def __init__ (self, term, min=0, max=1, **kw):
1656 """Term with a numerical constraint.
1657
1658 @param term: A term, the number of appearances of which is
1659 constrained in this term.
1660 @type term: L{Node}
1661
1662 @keyword min: The minimum number of occurrences of C{term}.
1663 The value must be non-negative.
1664
1665 @keyword max: The maximum number of occurrences of C{term}.
1666 The value must be positive (in which case it must also be no
1667 smaller than C{min}), or C{None} to indicate an unbounded
1668 number of occurrences."""
1669 super(NumericalConstraint, self).__init__(**kw)
1670 self.__term = term
1671 self.__min = min
1672 self.__max = max
1673
1676
1678 return [ (0,) + _fc for _fc in self.__term.first ]
1679
1681 return [ (0,) + _lc for _lc in self.__term.last ]
1682
1685
1704
1706 if pre is not None:
1707 pre(self, position, arg)
1708 self.__term._walkTermTree(position + (0,), pre, post, arg)
1709 if post is not None:
1710 post(self, position, arg)
1711
1720
1721 -class Choice (MultiTermNode):
1722 """A term that may be any one of a set of terms.
1723
1724 This term matches if any one of its contained terms matches."""
1725
1726 _Precedence = -3
1727
1729 """Term that selects one of a set of terms.
1730
1731 The terms are provided as arguments. All must be instances of
1732 a subclass of L{Node}."""
1733 super(Choice, self).__init__(*terms, **kw)
1734
1740
1746
1748 for t in self.terms:
1749 if t.nullable:
1750 return True
1751 return False
1752
1760
1769
1771 """A term that is an ordered sequence of terms."""
1772
1773 _Precedence = -2
1774
1776 """Term that collects an ordered sequence of terms.
1777
1778 The terms are provided as arguments. All must be instances of
1779 a subclass of L{Node}."""
1780 super(Sequence, self).__init__(*terms, **kw)
1781
1783 rv = set()
1784 c = 0
1785 while c < len(self.terms):
1786 t = self.terms[c]
1787 rv.update([ (c,) + _fc for _fc in t.first])
1788 if not t.nullable:
1789 break
1790 c += 1
1791 return rv
1792
1794 rv = set()
1795 c = len(self.terms) - 1;
1796 while 0 <= c:
1797 t = self.terms[c]
1798 rv.update([ (c,) + _lc for _lc in t.last])
1799 if not t.nullable:
1800 break
1801 c -= 1
1802 return rv
1803
1805 for t in self.terms:
1806 if not t.nullable:
1807 return False
1808 return True
1809
1811 rv = {}
1812 for c in xrange(len(self.terms)):
1813 pp = (c,)
1814 for (q, transition_set) in self.terms[c].follow.iteritems():
1815 rv[pp + q] = self._PosConcatTransitionSet(pp, transition_set)
1816 for c in xrange(len(self.terms)-1):
1817 t = self.terms[c]
1818 pp = (c,)
1819
1820
1821
1822 for q in t.last:
1823 psi = {}
1824 for p1 in t.counterSubPositions(q):
1825 psi[pp + p1] = self.RESET
1826 nc = c
1827 while nc+1 < len(self.terms):
1828 nc += 1
1829 nt = self.terms[nc]
1830 for sq1 in nt.first:
1831 q1 = (nc,) + sq1
1832 rv[pp+q].append((q1, psi))
1833 if not nt.nullable:
1834 break
1835 return rv
1836
1845
1846 -class All (MultiTermNode, LeafNode):
1847 """A term that is an unordered sequence of terms.
1848
1849 Note that the inheritance structure for this node is unusual. It
1850 has multiple children when it is treated as a term tree, but is
1851 considered a leaf node when constructing an automaton.
1852 """
1853
1854 _Precedence = 0
1855
1857 """Term that collects an unordered sequence of terms.
1858
1859 The terms are provided as arguments. All must be instances of
1860 a subclass of L{Node}."""
1861 super(All, self).__init__(*terms, **kw)
1862
1864 for t in self.terms:
1865 if not t.nullable:
1866 return False
1867 return True
1868
1869 @classmethod
1871 """Create a term tree that implements unordered catenation of
1872 the terms.
1873
1874 This expansion results in a standard choice/sequence term
1875 tree, at the cost of quadratic state expansion because terms
1876 are L{cloned<Node.clone>} as required to satisfy the tree
1877 requirements of the term tree.
1878
1879 @param terms: The tuple of terms that are elements of an
1880 accepted sequence.
1881
1882 @return: A term tree comprising a choice between sequences
1883 that connect each term to the unordered catenation of the
1884 remaining terms."""
1885 if 1 == len(terms):
1886 return terms[0]
1887 disjuncts = []
1888 for i in xrange(len(terms)):
1889 n = terms[i]
1890 rem = map(lambda _s: _s.clone(), terms[:i] + terms[i+1:])
1891 disjuncts.append(Sequence(n, cls.CreateTermTree(*rem)))
1892 return Choice(*disjuncts)
1893
1895 return u'&(' + ','.join([str(_t) for _t in self.terms]) + ')'
1896
1898 """A leaf term that is a symbol.
1899
1900 The symbol is represented by the L{metadata} field."""
1901
1902 _Precedence = 0
1903
1907
1910
1913