Package pyxb :: Package utils :: Module fac
[hide private]
[frames] | no frames]

Source Code for Module pyxb.utils.fac

   1  # -*- coding: utf-8 -*- 
   2  # Copyright 2012, Peter A. Bigot 
   3  # 
   4  # Licensed under the Apache License, Version 2.0 (the "License"); you may 
   5  # not use this file except in compliance with the License. You may obtain a 
   6  # copy of the License at: 
   7  # 
   8  #            http://www.apache.org/licenses/LICENSE-2.0 
   9  # 
  10  # Unless required by applicable law or agreed to in writing, software 
  11  # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 
  12  # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 
  13  # License for the specific language governing permissions and limitations 
  14  # under the License. 
  15   
  16  """This module provides Finite Automata with Counters. 
  17   
  18  FACs are type of state machine where a transition may include a 
  19  constraint and a modification to a set of counters.  They are used to 
  20  implement regular expressions with numerical constraints, as are found 
  21  in POSIX regexp, Perl, and XML schema. 
  22   
  23  The implementation here derives from U{Regular Expressions with 
  24  Numerical Constraints and Automata with Counters 
  25  <https://bora.uib.no/bitstream/1956/3628/3/Hovland_LNCS%205684.pdf>}, 
  26  Dag Hovland, Lecture Notes in Computer Science, 2009, Volume 5684, 
  27  Theoretical Aspects of Computing - ICTAC 2009, Pages 231-245.  In what 
  28  follows, this reference will be denoted B{HOV09}. 
  29    
  30  A regular expression is directly translated into a term tree, where 
  31  nodes are operators such as sequence, choice, and counter 
  32  restrictions, and the leaf nodes denote symbols in the language of the 
  33  regular expression. 
  34   
  35  In the case of XML content models, the symbols include L{element 
  36  declarations <pyxb.xmlschema.structures.ElementDeclaration>} and 
  37  L{wildcard elements <pyxb.xmlschema.structures.Wildcard>}.  A 
  38  numerical constraint node corresponds to an L{XML particle 
  39  <pyxb.xmlschema.structures.Particle>}, and choice and sequence nodes 
  40  derive from L{model groups <pyxb.xmlschema.structures.ModelGroup>} of 
  41  types B{choice} and B{sequence}.  As suggested in U{The Membership 
  42  Problem for Regular Expressions with Unordered Concatenation and 
  43  Numerical Constraints <http://www.ii.uib.no/~dagh/presLATA2012.pdf>} 
  44  the B{all} content model can be translated into state machine using 
  45  choice and sequence at the cost of a quadratic size explosion.  Since 
  46  some XML content models might have a hundred terms in an unordered 
  47  catenation, this is not acceptable, and the implementation here 
  48  optimizes this construct by creating a leaf node in the automaton 
  49  which in turn contains sub-automata for each term, and permits an exit 
  50  transition only when all the terms that are required have been 
  51  completed. 
  52   
  53  @note: In XSD 1.1 the restriction that terms in an B{all} model group 
  54  occur at most once has been removed.  Since the current implementation 
  55  removes a completed term from the set of available terms, this will 
  56  not work: instead the subconfiguration with its counter values must be 
  57  retained between matches. 
  58  """ 
  59   
  60  import operator 
  61  import logging 
  62   
  63  log_ = logging.getLogger(__name__) 
64 65 -class FACError (Exception):
66 pass
67
68 -class InvalidTermTreeError (FACError):
69 """Exception raised when a FAC term tree is not a tree. 70 71 For example, a L{Symbol} node appears multiple times, or a cycle is detected.""" 72 73 parent = None 74 """The L{MultiTermNode} containing the term that proves invalidity""" 75 76 term = None 77 """The L{Node} that proves invalidity""" 78
79 - def __init__ (self, *args):
80 (self.parent, self.term) = args 81 super(InvalidTermTreeError, self).__init__(*args)
82
83 -class UpdateApplicationError (FACError):
84 """Exception raised when an unsatisfied update instruction is executed. 85 86 This indicates an internal error in the implementation.""" 87 88 update_instruction = None 89 """The L{UpdateInstruction} with an unsatisfied L{CounterCondition}""" 90 91 values = None 92 """The unsatisfying value map from L{CounterCondition} instances to integers""" 93
94 - def __init__ (self, *args):
95 (self.update_instruction, self.values) = args 96 super(UpdateApplicationError, self).__init__(*args)
97
98 -class AutomatonStepError (Exception):
99 """Symbol rejected by L{Configuration_ABC.step}. 100 101 The exception indicates that the proposed symbol either failed to 102 produce a transition (L{UnrecognizedSymbolError}) or produced 103 multiple equally valid transitions 104 (L{NondeterministicSymbolError}).""" 105 106 configuration = None 107 """The instance of L{Configuration_ABC} that raised the exception. 108 From L{Configuration_ABC.acceptableSymbols} you can determine what 109 alternatives might be present.""" 110 111 symbol = None 112 """The symbol that was not accepted.""" 113
114 - def __get_acceptable (self):
115 """A list of symbols that the configuration would accept in its current state.""" 116 return self.configuration.acceptableSymbols()
117 acceptable = property(__get_acceptable) 118
119 - def __init__ (self, *args):
120 (self.configuration, self.symbol) = args 121 super(AutomatonStepError, self).__init__(*args)
122
123 -class UnrecognizedSymbolError (AutomatonStepError):
124 """L{Configuration.step} failed to find a valid transition.""" 125 pass
126
127 -class NondeterministicSymbolError (AutomatonStepError):
128 """L{Configuration.step} found multiple transitions.""" 129 pass
130
131 -class SymbolMatch_mixin (object):
132 """Mix-in used by symbols to provide a custom match implementation. 133 134 If a L{State.symbol} value is an instance of this mix-in, then it 135 will be used to validate a candidate symbol for a match.""" 136
137 - def match (self, symbol):
138 raise NotImplementedError('%s.match' % (type(self).__name__,))
139
140 -class State (object):
141 """A thin wrapper around an object reference. 142 143 The state of the automaton corresponds to a position, or marked 144 symbol, in the term tree. Because the same symbol may appear at 145 multiple locations in the tree, and the distinction between these 146 positions is critical, a L{State} wrapper is provided to maintain 147 distinct values.""" 148
149 - def __init__ (self, symbol, is_initial, final_update=None, is_unordered_catenation=False):
150 """Create a FAC state. 151 152 @param symbol: The symbol associated with the state. 153 Normally initialized from the L{Symbol.metadata} value. The 154 state may be entered if, among other conditions, the L{match} 155 routine accepts the proposed input as being consistent with 156 this value. 157 158 @param is_initial: C{True} iff this state may serve as the 159 first state of the automaton. 160 161 @param final_update: C{None} if this state is not an 162 accepting state of the automaton; otherwise a set of 163 L{UpdateInstruction} values that must be satisfied by the 164 counter values in a configuration as a further restriction of 165 acceptance. 166 167 @param is_unordered_catenation: C{True} if this state has 168 subautomata that must be matched to execute the unordered 169 catenation of an L{All} node; C{False} if this is a regular 170 symbol.""" 171 self.__symbol = symbol 172 self.__isInitial = not not is_initial 173 self.__finalUpdate = final_update 174 self.__isUnorderedCatenation = is_unordered_catenation
175 176 __automaton = None
177 - def __get_automaton (self):
178 """Link to the L{Automaton} to which the state belongs.""" 179 return self.__automaton
180 - def _set_automaton (self, automaton):
181 """Method invoked during automaton construction to set state owner.""" 182 assert self.__automaton is None 183 self.__automaton = automaton 184 return self
185 automaton = property(__get_automaton) 186 187 __symbol = None
188 - def __get_symbol (self):
189 """Application-specific metadata identifying the symbol. 190 191 See also L{match}.""" 192 return self.__symbol
193 symbol = property(__get_symbol) 194 195 __isUnorderedCatenation = None
196 - def __get_isUnorderedCatenation (self):
197 """Indicate whether the state has subautomata for unordered 198 catenation. 199 200 To reduce state explosion due to non-determinism, such a state 201 executes internal transitions in subautomata until all terms 202 have matched or a failure is discovered.""" 203 return self.__isUnorderedCatenation
204 isUnorderedCatenation = property(__get_isUnorderedCatenation) 205 206 __subAutomata = None
207 - def __get_subAutomata (self):
208 """A sequence of sub-automata supporting internal state transitions. 209 210 This will return C{None} unless L{isUnorderedCatenation} is C{True}.""" 211 return self.__subAutomata
212 - def _set_subAutomata (self, *automata):
213 assert self.__subAutomata is None 214 assert self.__isUnorderedCatenation 215 self.__subAutomata = automata
216 subAutomata = property(__get_subAutomata) 217 218 __isInitial = None
219 - def __get_isInitial (self):
220 """C{True} iff this state may be the first state the automaton enters.""" 221 return self.__isInitial
222 isInitial = property(__get_isInitial) 223 224 __automatonEntryTransitions = None
226 """Return the set of initial transitions allowing entry to the automata through this state. 227 228 These are structurally-permitted transitions only, and must be 229 filtered based on the symbol that might trigger the 230 transition. The results are not filtered based on counter 231 value, since this value is used to determine how the 232 containing automaton might be entered. Consequently the 233 return value is the empty set unless this is an initial state. 234 235 The returned set is closed under entry to sub-automata, 236 i.e. it is guaranteed that each transition includes a 237 consuming state even if it requires a multi-element chain of 238 transitions into subautomata to reach one.""" 239 if self.__automatonEntryTransitions is None: 240 transitions = [] 241 if self.__isInitial: 242 xit = Transition(self, set()) 243 if self.__subAutomata is None: 244 transitions.append(xit) 245 else: 246 for sa in self.__subAutomata: 247 for saxit in sa.initialTransitions: 248 transitions.append(xit.chainTo(saxit.makeEnterAutomatonTransition())) 249 self.__automatonEntryTransitions = transitions 250 return self.__automatonEntryTransitions
251 automatonEntryTransitions = property(__get_automatonEntryTransitions) 252 253 __finalUpdate = None
254 - def __get_finalUpdate (self):
255 """Return the update instructions that must be satisfied for this to be a final state.""" 256 return self.__finalUpdate
257 finalUpdate = property(__get_finalUpdate) 258
259 - def subAutomataInitialTransitions (self, sub_automata=None):
260 """Return the set of candidate transitions to enter a sub-automaton of this state. 261 262 @param sub_automata: A subset of the sub-automata of this 263 state which should contribute to the result. If C{None}, all 264 sub-automata are used. 265 266 @return: A pair C{(nullable, transitions)} where C{nullable} 267 is C{True} iff there is at least one sub-automaton that is in 268 an accepting state on entry, and C{transitions} is a list of 269 L{Transition} instances describing how to reach some state in 270 a sub-automaton via a consumed symbol. 271 """ 272 assert self.__subAutomata is not None 273 is_nullable = True 274 transitions = [] 275 if sub_automata is None: 276 sub_automata = self.__subAutomata 277 for sa in sub_automata: 278 if not sa.nullable: 279 is_nullable = False 280 transitions.extend(sa.initialTransitions) 281 return (is_nullable, transitions)
282
283 - def isAccepting (self, counter_values):
284 """C{True} iff this state is an accepting state for the automaton. 285 286 @param counter_values: Counter values that further validate 287 whether the requirements of the automaton have been met. 288 289 @return: C{True} if this is an accepting state and the 290 counter values relevant at it are satisfied.""" 291 if self.__finalUpdate is None: 292 return False 293 return UpdateInstruction.Satisfies(counter_values, self.__finalUpdate)
294 295 __transitionSet = None
296 - def __get_transitionSet (self):
297 """Definitions of viable transitions from this state. 298 299 The transition set of a state is a set of L{Transition} nodes 300 identifying a state reachable in a single step from this 301 state, and a set of counter updates that must apply if the 302 transition is taken. 303 304 These transitions may not in themselves consume a symbol. For 305 example, if the destination state represents a match of an 306 L{unordered catenation of terms<All>}, then secondary 307 processing must be done to traverse into the automata for 308 those terms and identify transitions that include a symbol 309 consumption. 310 311 @note: Although conceptually the viable transitions are a set, 312 this implementation maintains them in a list so that order is 313 preserved when automata processing becomes non-deterministic. 314 PyXB is careful to build the transition list so that the 315 states are attempted in the order in which they appear in the 316 schema that define the automata. 317 """ 318 return self.__transitionSet
319 transitionSet = property(__get_transitionSet) 320
321 - def _set_transitionSet (self, transition_set):
322 """Method invoked during automaton construction to set the 323 legal transitions from the state. 324 325 The set of transitions cannot be defined until all states that 326 appear in it are available, so the creation of the automaton 327 requires that the association of the transition set be 328 delayed. (Though described as a set, the transitions are a 329 list where order reflects priority.) 330 331 @param transition_set: a list of pairs where the first 332 member is the destination L{State} and the second member is the 333 set of L{UpdateInstruction}s that apply when the automaton 334 transitions to the destination state.""" 335 336 self.__transitionSet = [] 337 seen = set() 338 for xit in transition_set: 339 if not (xit in seen): 340 seen.add(xit) 341 self.__transitionSet.append(xit)
342
343 - def match (self, symbol):
344 """Return C{True} iff the symbol matches for this state. 345 346 This may be overridden by subclasses when matching by 347 equivalence does not work. Alternatively, if the symbol 348 stored in this node is a subclass of L{SymbolMatch_mixin}, then 349 its match method will be used. Otherwise C{symbol} matches 350 only if it is equal to the L{symbol} of this state. 351 352 @param symbol: A candidate symbol corresponding to the 353 expression symbol for this state. 354 355 @return: C{True} iff C{symbol} is a match for this state. 356 """ 357 if isinstance(self.__symbol, SymbolMatch_mixin): 358 return self.__symbol.match(symbol) 359 return self.__symbol == symbol
360
361 - def __str__ (self):
362 return 'S.%x' % (id(self),)
363
364 - def _facText (self):
365 rv = [] 366 rv.extend(map(str, self.__transitionSet)) 367 if self.__finalUpdate is not None: 368 if 0 == len(self.__finalUpdate): 369 rv.append('Final (no conditions)') 370 else: 371 rv.append('Final if %s' % (' '.join(map(lambda _ui: str(_ui.counterCondition), self.__finalUpdate)))) 372 return '\n'.join(rv)
373
374 -class CounterCondition (object):
375 """A counter condition is a range limit on valid counter values. 376 377 Instances of this class serve as keys for the counters that 378 represent the configuration of a FAC. The instance also maintains 379 a pointer to application-specific L{metadata}.""" 380 381 __min = None
382 - def __get_min (self):
383 """The minimum legal value for the counter. 384 385 This is a non-negative integer.""" 386 return self.__min
387 min = property(__get_min) 388 389 __max = None
390 - def __get_max (self):
391 """The maximum legal value for the counter. 392 393 This is a positive integer, or C{None} to indicate that the 394 counter is unbounded.""" 395 return self.__max
396 max = property(__get_max) 397 398 __metadata = None
399 - def __get_metadata (self):
400 """A pointer to application metadata provided when the condition was created.""" 401 return self.__metadata
402 metadata = property(__get_metadata) 403
404 - def __init__ (self, min, max, metadata=None):
405 """Create a counter condition. 406 407 @param min: The value for L{min} 408 @param max: The value for L{max} 409 @param metadata: The value for L{metadata} 410 """ 411 self.__min = min 412 self.__max = max 413 self.__metadata = metadata
414
415 - def __hash__ (self):
416 return hash(self.__min) ^ hash(self.__max) ^ hash(self.__metadata)
417
418 - def __eq__ (self, other):
419 return (other is not None) \ 420 and (self.__min == other.__min) \ 421 and (self.__max == other.__max) \ 422 and (self.__metadata == other.__metadata)
423
424 - def __ne__ (self, other):
425 return not self.__eq__(other)
426
427 - def __str__ (self):
428 return 'C.%x{%s,%s}' % (id(self), self.min, self.max is not None and self.max or '')
429
430 -class UpdateInstruction:
431 """An update instruction pairs a counter with a mutation of that 432 counter. 433 434 The instruction is executed during a transition from one state to 435 another, and causes the corresponding counter to be incremented or 436 reset. The instruction may only be applied if doing so does not 437 violate the conditions of the counter it affects.""" 438 439 __counterCondition = None
440 - def __get_counterCondition (self):
441 """A reference to the L{CounterCondition} identifying the 442 counter to be updated. 443 444 The counter condition instance is used as a key to the 445 dictionary maintaining current counter values.""" 446 return self.__counterCondition
447 counterCondition = property(__get_counterCondition) 448 449 __doIncrement = None
450 - def __get_doIncrement (self):
451 """C{True} if the counter is to be incremented; C{False} if it is to be reset.""" 452 return self.__doIncrement
453 doIncrement = property(__get_doIncrement) 454 455 # Cached values extracted from counter condition 456 __min = None 457 __max = None 458
459 - def __init__ (self, counter_condition, do_increment):
460 """Create an update instruction. 461 462 @param counter_condition: A L{CounterCondition} identifying a 463 minimum and maximum value for a counter, and serving as a map 464 key for the value of the corresponding counter. 465 466 @param do_increment: C{True} if the update is to increment 467 the value of the counter; C{False} if the update is to reset 468 the counter. 469 """ 470 self.__counterCondition = counter_condition 471 self.__doIncrement = not not do_increment 472 self.__min = counter_condition.min 473 self.__max = counter_condition.max
474
475 - def satisfiedBy (self, counter_values):
476 """Implement a component of definition 5 from B{HOV09}. 477 478 The update instruction is satisfied by the counter values if 479 its action may be legitimately applied to the value of its 480 associated counter. 481 482 @param counter_values: A map from L{CounterCondition}s to 483 non-negative integers 484 485 @return: C{True} or C{False} 486 """ 487 value = counter_values[self.__counterCondition] 488 if self.__doIncrement \ 489 and (self.__max is not None) \ 490 and (value >= self.__max): 491 return False 492 if (not self.__doIncrement) \ 493 and (value < self.__min): 494 return False 495 return True
496 497 @classmethod
498 - def Satisfies (cls, counter_values, update_instructions):
499 """Return C{True} iff the counter values satisfy the update 500 instructions. 501 502 @param counter_values: A map from L{CounterCondition} to 503 integer counter values 504 505 @param update_instructions: A set of L{UpdateInstruction} 506 instances 507 508 @return: C{True} iff all instructions are satisfied by the 509 values and limits.""" 510 for psi in update_instructions: 511 if not psi.satisfiedBy(counter_values): 512 return False 513 return True
514
515 - def apply (self, counter_values):
516 """Apply the update instruction to the provided counter values. 517 518 @param counter_values: A map from L{CounterCondition} to 519 integer counter values. This map is updated in-place.""" 520 if not self.satisfiedBy(counter_values): 521 raise UpdateApplicationError(self, counter_values) 522 value = counter_values[self.__counterCondition] 523 if self.__doIncrement: 524 value += 1 525 else: 526 value = 1 527 counter_values[self.__counterCondition] = value
528 529 @classmethod
530 - def Apply (cls, update_instructions, counter_values):
531 """Apply the update instructions to the counter values. 532 533 @param update_instructions: A set of L{UpdateInstruction} 534 instances. 535 536 @param counter_values: A map from L{CounterCondition} 537 instances to non-negative integers. This map is updated 538 in-place by applying each instruction in 539 C{update_instructions}.""" 540 for psi in update_instructions: 541 psi.apply(counter_values)
542
543 - def __hash__ (self):
544 return hash(self.__counterCondition) ^ hash(self.__doIncrement)
545
546 - def __eq__ (self, other):
547 return (other is not None) \ 548 and (self.__doIncrement == other.__doIncrement) \ 549 and (self.__counterCondition == other.__counterCondition)
550
551 - def __ne__ (self, other):
552 return not self.__eq__(other)
553
554 - def __str__ (self):
555 return '%s %s' % (self.__doIncrement and 'inc' or 'reset', self.__counterCondition)
556
557 -class Transition (object):
558 """Representation of a FAC state transition.""" 559 560 __destination = None
561 - def __get_destination (self):
562 """The transition destination state.""" 563 return self.__destination
564 destination = property(__get_destination) 565 566 __updateInstructions = None
567 - def __get_updateInstructions (self):
568 """The set of counter updates that are applied when the transition is taken.""" 569 return self.__updateInstructions
570 updateInstructions = property(__get_updateInstructions) 571 572 __nextTransition = None
573 - def __get_nextTransition (self):
574 """The next transition to apply in this chain. 575 576 C{None} if this is the last transition in the chain.""" 577 return self.__nextTransition
578 nextTransition = property(__get_nextTransition) 579 580 __layerLink = None 596 layerLink = property(__get_layerLink) 597
598 - def __init__ (self, destination, update_instructions, layer_link=None):
599 """Create a transition to a state. 600 601 @param destination: the state into which the transition is 602 made 603 604 @param update_instructions: A iterable of L{UpdateInstruction}s 605 denoting the changes that must be made to counters as a 606 consequence of taking the transition. 607 608 @keyword layer_link: The value for L{layerLink}.""" 609 self.__destination = destination 610 if not isinstance(update_instructions, list): 611 update_instructions = list(update_instructions) 612 self.__updateInstructions = update_instructions 613 self.__layerLink = layer_link
614
615 - def consumingState (self):
616 """Return the state in this transition chain that must match a symbol.""" 617 618 # Transitions to a state with subautomata never consume anything 619 if self.__destination.subAutomata is not None: 620 if not self.__nextTransition: 621 return None 622 return self.__nextTransition.consumingState() 623 # I don't think there should be subsequent transitions 624 assert self.__nextTransition is None 625 return self.__destination
626
627 - def consumedSymbol (self):
628 """Return the L{symbol<State.symbol>} of the L{consumingState}.""" 629 return self.consumingState().symbol
630
631 - def satisfiedBy (self, configuration):
632 """Check the transition update instructions against 633 configuration counter values. 634 635 This implementation follows layer changes, updating the 636 configuration used as counter value source as necessary. 637 638 @param configuration: A L{Configuration} instance containing 639 counter data against which update instruction satisfaction is 640 checked. 641 642 @return: C{True} iff all update instructions along the 643 transition chain are satisfied by their relevant 644 configuration.""" 645 # If we're entering an automaton, we know no subsequent 646 # transitions have update instructions 647 if isinstance(self.__layerLink, Automaton): 648 return True 649 # If we're leaving an automaton, switch to the configuration 650 # that is relevant to the destination of the transition. 651 if isinstance(self.__layerLink, Configuration): 652 configuration = self.__layerLink 653 assert self.destination.automaton == configuration.automaton 654 # Blow chunks if the configuration doesn't satisfy the transition 655 if not configuration.satisfies(self): 656 return False 657 # Otherwise try the next transition, or succeed if there isn't one 658 if self.__nextTransition: 659 return self.__nextTransition.satisfiedBy(configuration) 660 return True
661
662 - def apply (self, configuration, clone_map=None):
663 """Apply the transitition to a configuration. 664 665 This updates the configuration counter values based on the 666 update instructions, and sets the new configuration state. 667 668 @note: If the transition involves leaving a sub-automaton or 669 creating a new sub-automaton, the returned configuration 670 structure will be different from the one passed in. You 671 should invoke this as:: 672 673 cfg = transition.apply(cfg) 674 675 @param configuration: A L{Configuration} of an executing automaton 676 677 @param clone_map: A map from L{Configuration} to 678 L{Configuration} reflecting the replacements made when the 679 configuration for which the transition was calculated was 680 subsequently cloned into the C{configuration} passed into this 681 method. This is only necessary when the transition includes 682 layer transitions. 683 684 @return: The resulting configuration 685 """ 686 layer_link = self.__layerLink 687 if isinstance(layer_link, Configuration): 688 if clone_map is not None: 689 layer_link = clone_map[layer_link] 690 configuration = layer_link.leaveAutomaton(configuration) 691 elif isinstance(layer_link, Automaton): 692 configuration = configuration.enterAutomaton(layer_link) 693 UpdateInstruction.Apply(self.updateInstructions, configuration._get_counterValues()) 694 configuration._set_state(self.destination, layer_link is None) 695 if self.__nextTransition is None: 696 return configuration 697 return self.__nextTransition.apply(configuration, clone_map)
698
699 - def chainTo (self, next_transition):
700 """Duplicate the state and chain the duplicate to a successor 701 transition. 702 703 This returns a new transition which applies the operation for 704 this transition, then proceeds to apply the next transition in 705 the chain. 706 707 @note: The node that is invoking this must not have successor 708 transitions. 709 710 @param next_transition: A L{Transition} node describing a 711 subsequent transition. 712 713 @return: a clone of this node, augmented with a link to 714 C{next_transition}.""" 715 assert not self.__nextTransition 716 head = type(self)(self.__destination, self.__updateInstructions, layer_link=self.__layerLink) 717 head.__nextTransition = next_transition 718 return head
719
721 """Replicate the transition as a layer link into its automaton. 722 723 This is used on initial transitions into sub-automata where a 724 sub-configuration must be created and recorded.""" 725 assert self.__layerLink is None 726 assert self.__nextTransition is None 727 head = type(self)(self.__destination, self.__updateInstructions) 728 head.__layerLink = self.__destination.automaton 729 return head
730
731 - def __hash__ (self):
732 rv = hash(self.__destination) 733 for ui in self.__updateInstructions: 734 rv ^= hash(ui) 735 return rv ^ hash(self.__nextTransition) ^ hash(self.__layerLink)
736
737 - def __eq__ (self, other):
738 return (other is not None) \ 739 and (self.__destination == other.__destination) \ 740 and (self.__updateInstructions == other.__updateInstructions) \ 741 and (self.__nextTransition == other.__nextTransition) \ 742 and (self.__layerLink == other.__layerLink)
743
744 - def __ne__ (self, other):
745 return not self.__eq__(other)
746
747 - def __str__ (self):
748 rv = [] 749 if isinstance(self.__layerLink, Configuration): 750 rv.append('from A%x ' % (id(self.__layerLink.automaton),)) 751 elif isinstance(self.__layerLink, Automaton): 752 rv.append('in A%x ' % (id(self.__layerLink))) 753 rv.append('enter %s ' % (self.destination,)) 754 if (self.consumingState() == self.destination): 755 rv.append('via %s ' % (self.destination.symbol,)) 756 rv.append('with %s' % (' ; '.join(map(str, self.updateInstructions)),)) 757 if self.__nextTransition: 758 rv.append("\n\tthen ") 759 rv.append(str(self.__nextTransition)) 760 return ''.join(rv)
761
762 -class Configuration_ABC (object):
763 """Base class for something that represents an L{Automaton} in 764 execution. 765 766 For deterministic automata, this is generally a L{Configuration} 767 which records the current automaton state along with its counter 768 values. 769 770 For non-deterministic automata, this is a L{MultiConfiguration} 771 which records a set of L{Configuration}s.""" 772
773 - def acceptableSymbols (self):
774 """Return the acceptable L{Symbol}s given the current 775 configuration. 776 777 This method extracts the symbol from all candidate transitions 778 that are permitted based on the current counter values. 779 Because transitions are presented in a preferred order, the 780 symbols are as well.""" 781 raise NotImplementedError('%s.acceptableSymbols' % (type(self).__name__,))
782
783 - def step (self, symbol):
784 """Execute an automaton transition using the given symbol. 785 786 @param symbol: A symbol from the alphabet of the automaton's 787 language. This is a Python value that should be accepted by 788 the L{SymbolMatch_mixin.match} method of a L{State.symbol}. 789 It is not a L{Symbol} instance. 790 791 @return: The new configuration resulting from the step. 792 793 @raises AutomatonStepError: L{UnrecognizedSymbolError} 794 when no transition compatible with C{symbol} is available, and 795 L{NondeterministicSymbolError} if C{symbol} admits multiple 796 transitions and the subclass does not support 797 non-deterministic steps (see L{MultiConfiguration}). 798 799 @warning: If the step entered or left a sub-automaton the 800 return value will not be the configuration that was used to 801 execute the step. The proper pattern for using this method 802 is:: 803 804 cfg = cfg.step(sym) 805 806 """ 807 raise NotImplementedError('%s.step' % (type(self).__name__,))
808
809 -class Configuration (Configuration_ABC):
810 """The state of an L{Automaton} in execution. 811 812 This combines a state node of the automaton with a set of counter 813 values.""" 814 815 __state = None
816 - def __get_state (self):
817 """The state of the configuration. 818 819 This is C{None} to indicate an initial state, or one of the underlying automaton's states.""" 820 return self.__state
821 - def _set_state (self, state, is_layer_change):
822 """Internal state transition interface. 823 824 @param state: the new destination state 825 826 @param is_layer_change: C{True} iff the transition inducing 827 the state change involves a layer change. 828 """ 829 830 # If the new state and old state are the same, the layer 831 # change has no effect (we're probably leaving a 832 # subconfiguration, and we want to keep the current set of 833 # sub-automata.) 834 if state == self.__state: 835 return 836 837 # Otherwise, discard any unprocessed automata in the former 838 # state, set the state, and if the new state has subautomata 839 # create a set holding them so they can be processed. 840 if is_layer_change: 841 self.__subConfiguration = None 842 self.__subAutomata = None 843 self.__state = state 844 if is_layer_change and (state.subAutomata is not None): 845 assert self.__subAutomata is None 846 self.__subAutomata = set(state.subAutomata)
847 state = property(__get_state) 848 849 __counterValues = None 850 """The values of the counters. 851 852 This is a map from the CounterCondition instances of the 853 underlying automaton to integer values."""
854 - def _get_counterValues (self):
855 return self.__counterValues
856 857 __automaton = None
858 - def __get_automaton (self):
859 return self.__automaton
860 automaton = property(__get_automaton) 861 862 __subConfiguration = None
863 - def __get_subConfiguration (self):
864 """Reference to configuration being executed in a sub-automaton. 865 866 C{None} if no sub-automaton is active, else a reference to a 867 configuration that is being executed in a sub-automaton. 868 869 Sub-configurations are used to match sub-terms in an 870 L{unordered catenation<All>} term. A configuration may have 871 at most one sub-configuration at a time, and the configuration 872 will be removed and possibly replaced when the term being 873 processed completes.""" 874 return self.__subConfiguration
875 subConfiguration = property(__get_subConfiguration) 876 877 __superConfiguration = None
878 - def __get_superConfiguration (self):
879 """Reference to the configuration for which this is a 880 sub-configuration. 881 882 C{None} if no super-automaton is active, else a reference to a 883 configuration that is being executed in a super-automaton. 884 885 The super-configuration relation persists for the lifetime of 886 the configuration.""" 887 return self.__superConfiguration
888 superConfiguration = property(__get_superConfiguration) 889 890 __subAutomata = None
891 - def __get_subAutomata (self):
892 """A set of automata that must be satisfied before the current state can complete. 893 894 This is used in unordered catenation. Each sub-automaton 895 represents a term in the catenation. When the configuration 896 enters a state with sub-automata, a set containing references 897 to those automata is assigned to this attribute. 898 Subsequently, until all automata in the state are satisfied, 899 transitions can only occur within an active sub-automaton, out 900 of the active sub-automaton if it is in an accepting state, 901 and into a new sub-automaton if no sub-automaton is active. 902 """ 903 return self.__subAutomata
904 - def _set_subAutomata (self, automata):
905 self.__subAutomata = set(automata)
906 subAutomata = property(__get_subAutomata) 907
909 """Create a transition back to the containing configuration. 910 911 This is done when a configuration is in an accepting state and 912 there are candidate transitions to other states that must be 913 considered. The transition does not consume a symbol.""" 914 assert self.__superConfiguration is not None 915 return Transition(self.__superConfiguration.__state, set(), layer_link=self.__superConfiguration)
916
917 - def leaveAutomaton (self, sub_configuration):
918 """Execute steps to leave a sub-automaton. 919 920 @param sub_configuration: The configuration associated with 921 the automata that has completed. 922 923 @return: C{self}""" 924 assert sub_configuration.__superConfiguration == self 925 self.__subConfiguration = None 926 return self
927
928 - def enterAutomaton (self, automaton):
929 """Execute steps to enter a new automaton. 930 931 The new automaton is removed from the set of remaining 932 automata for the current state, and a new configuration 933 created. No transition is made in that new configuration. 934 935 @param automaton: The automaton to be entered 936 937 @return: The configuration that executes the new automaton as 938 a sub-configuration of C{self}.""" 939 assert self.__subConfiguration is None 940 assert self.__subAutomata is not None 941 self.__subAutomata.remove(automaton) 942 self.__subConfiguration = Configuration(automaton) 943 self.__subConfiguration.__superConfiguration = self 944 return self.__subConfiguration
945
946 - def satisfies (self, transition):
948
949 - def reset (self):
950 fac = self.__automaton 951 self.__state = None 952 self.__counterValues = dict(zip(fac.counterConditions, len(fac.counterConditions) * (1,))) 953 self.__subConfiguration = None 954 self.__subAutomata = None
955
956 - def candidateTransitions (self, symbol=None):
957 """Return list of viable transitions on C{symbol} 958 959 The transitions that are structurally permitted from this 960 state, in order, filtering out those transitions where the 961 update instruction is not satisfied by the configuration 962 counter values and optionally those for which the symbol does 963 not match. 964 965 @param symbol: A symbol through which a transition from this 966 state is intended. A value of C{None} indicates that the set 967 of transitions should ignore the symbol; candidates are still 968 filtered based on the counter state of the configuration. 969 970 @return: A list of L{Transition} instances permitted from the 971 current configuration. If C{symbol} is not C{None}, 972 transitions that would not accept the symbol are excluded. 973 Any transition that would require an unsatisfied counter 974 update is also excluded. Non-deterministic automata may 975 result in a lits with multiple members. """ 976 977 fac = self.__automaton 978 transitions = [] 979 if symbol is None: 980 match_filter = lambda _xit: True 981 else: 982 match_filter = lambda _xit: _xit.consumingState().match(symbol) 983 update_filter = lambda _xit: _xit.satisfiedBy(self) 984 985 if self.__state is None: 986 # Special-case the initial entry to the topmost configuration 987 transitions.extend(fac.initialTransitions) 988 elif (self.__subConfiguration is not None) and not self.__subConfiguration.isAccepting(): 989 # If there's an active subconfiguration that is not in an 990 # accepting state, we can't do anything at this level. 991 pass 992 else: 993 # Normally include transitions at this level, but in some 994 # cases they are not permitted. 995 include_local = True 996 if self.__subAutomata: 997 # Disallow transitions in this level if there are 998 # subautomata that require symbols before a transition 999 # out of this node is allowed. 1000 (include_local, sub_initial) = self.__state.subAutomataInitialTransitions(self.__subAutomata) 1001 transitions.extend(map(lambda _xit: _xit.makeEnterAutomatonTransition(), sub_initial)) 1002 if include_local: 1003 # Transitions within this layer 1004 for xit in filter(update_filter, self.__state.transitionSet): 1005 if xit.consumingState() is not None: 1006 transitions.append(xit) 1007 else: 1008 # The transition did not consume a symbol, so we have to find 1009 # one that does, from among the subautomata of the destination. 1010 # We do not care if the destination is nullable; alternatives 1011 # to it are already being handled with different transitions. 1012 (_, sub_initial) = xit.destination.subAutomataInitialTransitions() 1013 transitions.extend(map(lambda _xit: xit.chainTo(_xit.makeEnterAutomatonTransition()), sub_initial)) 1014 if (self.__superConfiguration is not None) and self.isAccepting(): 1015 # Transitions that leave this automaton 1016 lxit = self.makeLeaveAutomatonTransition() 1017 supxit = self.__superConfiguration.candidateTransitions(symbol) 1018 transitions.extend(map(lambda _sx: lxit.chainTo(_sx), supxit)) 1019 assert len(frozenset(transitions)) == len(transitions) 1020 return filter(update_filter, filter(match_filter, transitions))
1021
1022 - def acceptableSymbols (self):
1023 return map(lambda _xit: _xit.consumedSymbol(), self.candidateTransitions())
1024
1025 - def step (self, symbol):
1026 transitions = self.candidateTransitions(symbol) 1027 if 0 == len(transitions): 1028 raise UnrecognizedSymbolError(self, symbol) 1029 if 1 < len(transitions): 1030 raise NondeterministicSymbolError(self, symbol) 1031 return transitions[0].apply(self)
1032
1033 - def isInitial (self):
1034 """Return C{True} iff no transitions have ever been made.""" 1035 return self.__state is None
1036
1037 - def isAccepting (self):
1038 """Return C{True} iff the automaton is in an accepting state.""" 1039 if self.__state is not None: 1040 # Any active sub-configuration must be accepting 1041 if (self.__subConfiguration is not None) and not self.__subConfiguration.isAccepting(): 1042 return False 1043 # Any unprocessed sub-automata must be nullable 1044 if self.__subAutomata is not None: 1045 if not reduce(operator.and_, map(lambda _sa: _sa.nullable, self.__subAutomata), True): 1046 return False 1047 # This state must be accepting 1048 return self.__state.isAccepting(self.__counterValues) 1049 # Accepting without any action requires nullable automaton 1050 return self.__automaton.nullable
1051
1052 - def __init__ (self, automaton, super_configuration=None):
1053 self.__automaton = automaton 1054 self.__superConfiguration = super_configuration 1055 self.reset()
1056
1057 - def clone (self, clone_map=None):
1058 """Clone a configuration and its descendents. 1059 1060 This is used for parallel execution where a configuration has 1061 multiple candidate transitions and must follow all of them. 1062 It clones the entire chain of configurations through 1063 multiple layers. 1064 1065 @param clone_map: Optional map into which the translation from 1066 the original configuration object to the corresponding cloned 1067 configuration object can be reconstructed, e.g. when applying 1068 a transition that includes automata exits referencing 1069 superconfigurations from the original configuration. 1070 """ 1071 if clone_map is None: 1072 clone_map = {} 1073 root = self 1074 while root.__superConfiguration is not None: 1075 root = root.__superConfiguration 1076 root = root._clone(clone_map, None) 1077 return clone_map.get(self)
1078
1079 - def _clone (self, clone_map, super_configuration):
1080 assert not self in clone_map 1081 other = type(self)(self.__automaton) 1082 clone_map[self] = other 1083 other.__state = self.__state 1084 other.__counterValues = self.__counterValues.copy() 1085 other.__superConfiguration = super_configuration 1086 if self.__subAutomata is not None: 1087 other.__subAutomata = self.__subAutomata.copy() 1088 if self.__subConfiguration: 1089 other.__subConfiguration = self.__subConfiguration._clone(clone_map, other) 1090 return other
1091
1092 - def __str__ (self):
1093 return '%s: %s' % (self.__state, ' ; '.join([ '%s=%u' % (_c,_v) for (_c,_v) in self.__counterValues.iteritems()]))
1094
1095 -class MultiConfiguration (Configuration_ABC):
1096 """Support parallel execution of state machine. 1097 1098 This holds a set of configurations, and executes each transition 1099 on each one. Configurations which fail to accept a step are 1100 silently dropped; only if this results in no remaining 1101 configurations will L{UnrecognizedSymbolError} be raised. If a 1102 step admits multiple valid transitions, a configuration is added 1103 for each one. 1104 1105 See L{pyxb.binding.content.AutomatonConfiguration} for an 1106 alternative solution which holds actions associated with the 1107 transition until the non-determinism is resolved.""" 1108 1109 __configurations = None 1110
1111 - def __init__ (self, configuration):
1113
1114 - def acceptableSymbols (self):
1115 acceptable = [] 1116 for cfg in self.__configurations: 1117 acceptable.extend(cfg.acceptableSymbols()) 1118 return acceptable
1119
1120 - def step (self, symbol):
1121 next_configs = [] 1122 for cfg in self.__configurations: 1123 transitions = cfg.candidateTransitions(symbol) 1124 if 0 == len(transitions): 1125 pass 1126 elif 1 == len(transitions): 1127 next_configs.append(transitions[0].apply(cfg)) 1128 else: 1129 for transition in transitions: 1130 clone_map = {} 1131 ccfg = cfg.clone(clone_map) 1132 next_configs.append(transition.apply(ccfg, clone_map)) 1133 if 0 == len(next_configs): 1134 raise UnrecognizedSymbolError(self, symbol) 1135 assert len(frozenset(next_configs)) == len(next_configs) 1136 self.__configurations = next_configs 1137 return self
1138
1139 - def acceptingConfigurations (self):
1140 """Return the set of configurations that are in an accepting state. 1141 1142 Note that some of the configurations may be within a 1143 sub-automaton; their presence in the return value is because 1144 the root configuration is also accepting.""" 1145 accepting = [] 1146 for cfg in self.__configurations: 1147 rcfg = cfg 1148 # Rule out configurations that are accepting within their 1149 # automaton, but not in the containing automaton. 1150 while rcfg.superConfiguration is not None: 1151 rcfg = rcfg.superConfiguration 1152 if rcfg.isAccepting(): 1153 accepting.append(cfg) 1154 return accepting
1155
1156 -class Automaton (object):
1157 """Representation of a Finite Automaton with Counters. 1158 1159 This has all the standard FAC elements, plus links to other 1160 states/automata as required to support the nested automata 1161 construct used for matching unordered catenation terms.""" 1162 __states = None
1163 - def __get_states (self):
1164 """The set of L{State}s in the automaton. 1165 1166 These correspond essentially to marked symbols in the original 1167 regular expression, or L{element 1168 declarations<pyxb.xmlschema.structures.ElementDeclaration>} in 1169 an XML schema. 1170 1171 @note: These are conceptually a set and are stored that way. 1172 When an L{Automaton} is constructed the incoming states should 1173 be passed as a list so the calculated initial transitions are 1174 executed in a deterministic order.""" 1175 return self.__states
1176 states = property(__get_states) 1177 1178 __counterConditions = None
1179 - def __get_counterConditions (self):
1180 """The set of L{CounterCondition}s in the automaton. 1181 1182 These are marked positions in the regular expression, or 1183 L{particles<pyxb.xmlschema.structures.Particle>} in an XML 1184 schema, paired with their occurrence constraints.""" 1185 return self.__counterConditions
1186 counterConditions = property(__get_counterConditions) 1187 1188 __nullable = None
1189 - def __get_nullable (self):
1190 """C{True} iff the automaton accepts the empty string.""" 1191 return self.__nullable
1192 nullable = property(__get_nullable) 1193 1194 __initialTransitions = None
1195 - def __get_initialTransitions (self):
1196 """The set of transitions that may be made to enter the automaton. 1197 1198 These are full transitions, including chains into subautomata 1199 if an initial state represents a node with sub-automata. 1200 1201 @note: As with L{State.transitionSet}, the set is represented 1202 as a list to preserve priority when resolving 1203 non-deterministic matches.""" 1204 return self.__initialTransitions
1205 initialTransitions = property(__get_initialTransitions) 1206 1207 __containingState = None
1208 - def __get_containingState (self):
1209 """The L{State} instance for which this is a sub-automaton. 1210 1211 C{None} if this is not a sub-automaton.""" 1212 return self.__containingState
1213 containingState = property(__get_containingState) 1214 1215 __finalStates = None
1216 - def __get_finalStates (self):
1217 """The set of L{State} members which can terminate a match.""" 1218 return self.__finalStates
1219 finalStates = property(__get_finalStates) 1220
1221 - def __init__ (self, states, counter_conditions, nullable, containing_state=None):
1222 self.__states = frozenset(states) 1223 map(lambda _s: _s._set_automaton(self), self.__states) 1224 self.__counterConditions = frozenset(counter_conditions) 1225 self.__nullable = nullable 1226 self.__containingState = containing_state 1227 xit = [] 1228 fnl = set() 1229 # Iterate over states, not self.__states, in case the input was a list. 1230 # This way we preserve the priority for initial transitions. 1231 for s in states: 1232 if s.isInitial: 1233 xit.extend(s.automatonEntryTransitions) 1234 if s.finalUpdate is not None: 1235 fnl.add(s) 1236 self.__initialTransitions = xit 1237 self.__finalStates = frozenset(fnl)
1238
1239 - def newConfiguration (self):
1240 """Return a new L{Configuration} instance for this automaton.""" 1241 return Configuration(self)
1242
1243 - def __str__ (self):
1244 rv = [] 1245 rv.append('sigma = %s' % (' '.join(map(lambda _s: str(_s.symbol), self.__states)))) 1246 rv.append('states = %s' % (' '.join(map(str, self.__states)))) 1247 for s in self.__states: 1248 if s.subAutomata is not None: 1249 for i in xrange(len(s.subAutomata)): 1250 rv.append('SA %s.%u is %x:\n ' % (str(s), i, id(s.subAutomata[i])) + '\n '.join(str(s.subAutomata[i]).split('\n'))) 1251 rv.append('counters = %s' % (' '.join(map(str, self.__counterConditions)))) 1252 rv.append('initial = %s' % (' ; '.join([ '%s on %s' % (_s, _s.symbol) for _s in filter(lambda _s: _s.isInitial, self.__states)]))) 1253 rv.append('initial transitions:\n%s' % ('\n'.join(map(str, self.initialTransitions)))) 1254 rv.append('States:') 1255 for s in self.__states: 1256 rv.append('%s: %s' % (s, s._facText())) 1257 return '\n'.join(rv)
1258
1259 -class Node (object):
1260 """Abstract class for any node in the term tree. 1261 1262 In its original form a B{position} (C{pos}) is a tuple of 1263 non-negative integers comprising a path from a node in the term 1264 tree. It identifies a node in the tree. After the FAC has been 1265 constructed, only positions that are leaf nodes in the term tree 1266 remain, and the corresponding symbol value (Python instance) is 1267 used as the position. 1268 1269 An B{update instruction} (C{psi}) is a map from positions to 1270 either L{Node.RESET} or L{Node.INCREMENT}. It identifies actions 1271 to be taken on the counter states corresponding to the positions 1272 in its domain. 1273 1274 A B{transition} is a pair containing a position and an update 1275 instruction. It identifies a potential next node in the state and 1276 the updates that are to be performed if the transition is taken. 1277 1278 A B{follow value} is a map from a position to a set of transitions 1279 that may originate from the pos. This set is represented as a 1280 Python list since update instructions are dicts and cannot be 1281 hashed. 1282 """ 1283 1284 _Precedence = None 1285 """An integral value used for parenthesizing expressions. 1286 1287 A subterm that has a precedence less than that of its containing 1288 term must be enclosed in parentheses when forming a text 1289 expression representing the containing term.""" 1290 1291 RESET = False 1292 """An arbitrary value representing reset of a counter.""" 1293 1294 INCREMENT = True 1295 """An arbitrary value representing increment of a counter.""" 1296
1297 - def __init__ (self, **kw):
1298 """Create a FAC term-tree node. 1299 1300 @keyword metadata: Any application-specific metadata retained in 1301 the term tree for transfer to the resulting automaton.""" 1302 self.__metadata = kw.get('metadata')
1303
1304 - def clone (self, *args, **kw):
1305 """Create a deep copy of the node. 1306 1307 All term-tree--related attributes and properties are replaced 1308 with deep clones. Other attributes are preserved. 1309 1310 @param args: A tuple of arguments to be passed to the instance 1311 constructor. 1312 1313 @param kw: A dict of keywords to be passed to the instance 1314 constructor. 1315 1316 @note: Subclasses should pre-extend this method to augment the 1317 C{args} and C{kw} parameters as necessary to match the 1318 expectations of the C{__init__} method of the class being 1319 cloned.""" 1320 kw.setdefault('metadata', self.metadata) 1321 return type(self)(*args, **kw)
1322 1323 __metadata = None
1324 - def __get_metadata (self):
1325 """Application-specific metadata provided during construction.""" 1326 return self.__metadata
1327 metadata = property(__get_metadata) 1328 1329 __first = None
1330 - def __get_first (self):
1331 """The I{first} set for the node. 1332 1333 This is the set of positions leading to symbols that can 1334 appear first in a string matched by an execution starting at 1335 the node.""" 1336 if self.__first is None: 1337 self.__first = frozenset(self._first()) 1338 return self.__first
1339 first = property(__get_first) 1340
1341 - def _first (self):
1342 """Abstract method that defines L{first} for the subclass. 1343 1344 The return value should be an iterable of tuples of integers 1345 denoting paths from this node through the term tree to a 1346 symbol.""" 1347 raise NotImplementedError('%s.first' % (type(self).__name__,))
1348 1349 __last = None
1350 - def __get_last (self):
1351 """The I{last} set for the node. 1352 1353 This is the set of positions leading to symbols that can 1354 appear last in a string matched by an execution starting at 1355 the node.""" 1356 if self.__last is None: 1357 self.__last = frozenset(self._last()) 1358 return self.__last
1359 last = property(__get_last) 1360
1361 - def _last (self):
1362 """Abstract method that defines L{last} for the subclass. 1363 1364 The return value should be an iterable of tuples of integers 1365 denoting paths from this node through the term tree to a 1366 symbol.""" 1367 raise NotImplementedError('%s.last' % (type(self).__name__,))
1368 1369 __nullable = None
1370 - def __get_nullable (self):
1371 """C{True} iff the empty string is accepted by this node.""" 1372 if self.__nullable is None: 1373 self.__nullable = self._nullable() 1374 return self.__nullable
1375 nullable = property(__get_nullable) 1376
1377 - def _nullable (self):
1378 """Abstract method that defines L{nullable} for the subclass. 1379 1380 The return value should be C{True} or C{False}.""" 1381 raise NotImplementedError('%s.nullable' % (type(self).__name__,))
1382 1383 __follow = None
1384 - def __get_follow (self):
1385 """The I{follow} map for the node.""" 1386 if self.__follow is None: 1387 self.__follow = self._follow() 1388 return self.__follow
1389 follow = property(__get_follow) 1390
1391 - def _follow (self):
1392 """Abstract method that defines L{follow} for the subclass. 1393 1394 The return value should be a map from tuples of integers (positions) 1395 to a list of transitions, where a transition is a position and 1396 an update instruction.""" 1397 raise NotImplementedError('%s.follow' % (type(self).__name__,))
1398
1399 - def reset (self):
1400 """Reset any term-tree state associated with the node. 1401 1402 Any change to the structure of the term tree in which the node 1403 appears invalidates memoized first/follow sets and related 1404 information. This method clears all that data so it can be 1405 recalculated. It does not clear the L{metadata} link, or any 1406 existing structural data.""" 1407 self.__first = None 1408 self.__last = None 1409 self.__nullable = None 1410 self.__follow = None 1411 self.__counterPositions = None
1412
1413 - def walkTermTree (self, pre, post, arg):
1414 """Utility function for term tree processing. 1415 1416 @param pre: a callable that, unless C{None}, is invoked at 1417 each node C{n} with parameters C{n}, C{pos}, and C{arg}, where 1418 C{pos} is the tuple of integers identifying the path from the 1419 node at on which this method was invoked to the node being 1420 processed. The invocation occurs before processing any 1421 subordinate nodes. 1422 1423 @param post: as with C{pre} but invocation occurs after 1424 processing any subordinate nodes. 1425 1426 @param arg: a value passed to invocations of C{pre} and 1427 C{post}.""" 1428 self._walkTermTree((), pre, post, arg)
1429
1430 - def _walkTermTree (self, position, pre, post, arg):
1431 """Abstract method implementing L{walkTermTree} for the subclass.""" 1432 raise NotImplementedError('%s.walkTermTree' % (type(self).__name__,))
1433 1434 __posNodeMap = None
1435 - def __get_posNodeMap (self):
1436 """A map from positions to nodes in the term tree.""" 1437 if self.__posNodeMap is None: 1438 pnm = { } 1439 self.walkTermTree(lambda _n,_p,_a: _a.setdefault(_p, _n), None, pnm) 1440 self.__posNodeMap = pnm 1441 return self.__posNodeMap
1442 posNodeMap = property(__get_posNodeMap) 1443 1444 __nodePosMap = None
1445 - def __get_nodePosMap (self):
1446 """A map from nodes to their position in the term tree.""" 1447 if self.__nodePosMap is None: 1448 npm = {} 1449 for (p,n) in self.posNodeMap.iteritems(): 1450 npm[n] = p 1451 self.__nodePosMap = npm 1452 return self.__nodePosMap
1453 nodePosMap = property(__get_nodePosMap) 1454 1455 @classmethod
1456 - def _PosConcatPosSet (cls, pos, pos_set):
1457 """Implement definition 11.1 in B{HOV09}.""" 1458 return frozenset([ pos + _mp for _mp in pos_set ])
1459 1460 @classmethod
1461 - def _PosConcatUpdateInstruction (cls, pos, psi):
1462 """Implement definition 11.2 in B{HOV09}""" 1463 rv = {} 1464 for (q, v) in psi.iteritems(): 1465 rv[pos + q] = v 1466 return rv
1467 1468 @classmethod
1469 - def _PosConcatTransitionSet (cls, pos, transition_set):
1470 """Implement definition 11.3 in B{HOV09}""" 1471 ts = [] 1472 for (q, psi) in transition_set: 1473 ts.append((pos + q, cls._PosConcatUpdateInstruction(pos, psi) )) 1474 return ts
1475
1476 - def __resetAndValidate (self, node, pos, visited_nodes):
1477 if node in visited_nodes: 1478 raise InvalidTermTreeError(self, node) 1479 node.reset() 1480 visited_nodes.add(node)
1481
1482 - def buildAutomaton (self, state_ctor=State, ctr_cond_ctor=CounterCondition, containing_state=None):
1483 # Validate that the term tree is in fact a tree. A DAG does 1484 # not work. If the tree had cycles, the automaton build 1485 # wouldn't even return. 1486 self.walkTermTree(self.__resetAndValidate, None, set()) 1487 1488 counter_map = { } 1489 for pos in self.counterPositions: 1490 nci = self.posNodeMap.get(pos) 1491 assert isinstance(nci, NumericalConstraint) 1492 assert nci not in counter_map 1493 counter_map[pos] = ctr_cond_ctor(nci.min, nci.max, nci.metadata) 1494 counters = counter_map.values() 1495 1496 state_map = { } 1497 for pos in self.follow.iterkeys(): 1498 sym = self.posNodeMap.get(pos) 1499 assert isinstance(sym, LeafNode) 1500 assert sym not in state_map 1501 1502 # The state may be an initial state if it is in the first 1503 # set for the root of the term tree. 1504 is_initial = pos in self.first 1505 1506 # The state may be a final state if it is nullable or is 1507 # in the last set of the term tree. 1508 final_update = None 1509 if (() == pos and sym.nullable) or (pos in self.last): 1510 # Acceptance is further constrained by the counter 1511 # values satisfying an update rule that would reset 1512 # all counters that are relevant at the state. 1513 final_update = set() 1514 for nci in map(counter_map.get, self.counterSubPositions(pos)): 1515 final_update.add(UpdateInstruction(nci, False)) 1516 state_map[pos] = state_ctor(sym.metadata, is_initial=is_initial, final_update=final_update, is_unordered_catenation=isinstance(sym, All)) 1517 if isinstance(sym, All): 1518 state_map[pos]._set_subAutomata(*map(lambda _s: _s.buildAutomaton(state_ctor, ctr_cond_ctor, containing_state=state_map[pos]), sym.terms)) 1519 states = state_map.values() 1520 1521 for (spos, transition_set) in self.follow.iteritems(): 1522 src = state_map[spos] 1523 phi = [] 1524 for (dpos, psi) in transition_set: 1525 dst = state_map[dpos] 1526 uiset = set() 1527 for (counter, action) in psi.iteritems(): 1528 uiset.add(UpdateInstruction(counter_map[counter], self.INCREMENT == action)) 1529 phi.append(Transition(dst, uiset)) 1530 src._set_transitionSet(phi) 1531 1532 return Automaton(states, counters, self.nullable, containing_state=containing_state)
1533 1534 __counterPositions = None
1535 - def __get_counterPositions (self):
1536 """Implement definition 13.1 from B{HOV09}. 1537 1538 The return value is the set of all positions leading to 1539 L{NumericalConstraint} nodes for which either the minimum 1540 value is not 1 or the maximum value is not unbounded.""" 1541 if self.__counterPositions is None: 1542 cpos = [] 1543 self.walkTermTree(lambda _n,_p,_a: \ 1544 isinstance(_n, NumericalConstraint) \ 1545 and ((1 != _n.min) \ 1546 or (_n.max is not None)) \ 1547 and _a.append(_p), 1548 None, cpos) 1549 self.__counterPositions = frozenset(cpos) 1550 return self.__counterPositions
1551 counterPositions = property(__get_counterPositions) 1552
1553 - def counterSubPositions (self, pos):
1554 """Implement definition 13.2 from B{HOV09}. 1555 1556 This is the subset of L{counterPositions} that occur along the 1557 path to C{pos}.""" 1558 rv = set() 1559 for cpos in self.counterPositions: 1560 if cpos == pos[:len(cpos)]: 1561 rv.add(cpos) 1562 return frozenset(rv)
1563
1564 - def _facToString (self):
1565 """Obtain a description of the FAC in text format. 1566 1567 This is a diagnostic tool, returning first, last, and follow 1568 maps using positions.""" 1569 rv = [] 1570 rv.append('r\t= %s' % (str(self),)) 1571 states = self.follow.keys() 1572 rv.append('sym(r)\t= %s' % (' '.join(map(str, map(self.posNodeMap.get, states))))) 1573 rv.append('first(r)\t= %s' % (' '.join(map(str, self.first)))) 1574 rv.append('last(r)\t= %s' % (' '.join(map(str, self.last)))) 1575 rv.append('C\t= %s' % (' '.join(map(str, self.counterPositions)))) 1576 for pos in self.first: 1577 rv.append('qI(%s) -> %s' % (self.posNodeMap[pos].metadata, str(pos))) 1578 for spos in states: 1579 for (dpos, transition_set) in self.follow[spos]: 1580 dst = self.posNodeMap[dpos] 1581 uv = [] 1582 for (c, u) in transition_set.iteritems(): 1583 uv.append('%s %s' % (u == self.INCREMENT and "inc" or "rst", str(c))) 1584 rv.append('%s -%s-> %s ; %s' % (str(spos), dst.metadata, str(dpos), ' ; '.join(uv))) 1585 return '\n'.join(rv)
1586
1587 -class MultiTermNode (Node):
1588 """Intermediary for nodes that have multiple child nodes.""" 1589 1590 __terms = None
1591 - def __get_terms (self):
1592 """The set of subordinate terms of the current node.""" 1593 return self.__terms
1594 terms = property(__get_terms) 1595
1596 - def __init__ (self, *terms, **kw):
1597 """Term that collects an ordered sequence of terms. 1598 1599 The terms are provided as arguments. All must be instances of 1600 a subclass of L{Node}.""" 1601 super(MultiTermNode, self).__init__(**kw) 1602 self.__terms = terms
1603
1604 - def clone (self):
1605 cterms = map(lambda _s: _s.clone(), self.__terms) 1606 return super(MultiTermNode, self).clone(*cterms)
1607
1608 - def _walkTermTree (self, position, pre, post, arg):
1609 if pre is not None: 1610 pre(self, position, arg) 1611 for c in xrange(len(self.__terms)): 1612 self.__terms[c]._walkTermTree(position + (c,), pre, post, arg) 1613 if post is not None: 1614 post(self, position, arg)
1615
1616 -class LeafNode (Node):
1617 """Intermediary for nodes that have no child nodes."""
1618 - def _first (self):
1619 return [()]
1620 - def _last (self):
1621 return [()]
1622 - def _nullable (self):
1623 return False
1624 - def _follow (self):
1625 return { (): frozenset() }
1626
1627 - def _walkTermTree (self, position, pre, post, arg):
1628 if pre is not None: 1629 pre(self, position, arg) 1630 if post is not None: 1631 post(self, position, arg)
1632
1633 -class NumericalConstraint (Node):
1634 """A term with a numeric range constraint. 1635 1636 This corresponds to a "particle" in the XML Schema content model.""" 1637 1638 _Precedence = -1 1639 1640 __min = None
1641 - def __get_min (self):
1642 return self.__min
1643 min = property(__get_min) 1644 1645 __max = None
1646 - def __get_max (self):
1647 return self.__max
1648 max = property(__get_max) 1649 1650 __term = None
1651 - def __get_term (self):
1652 return self.__term
1653 term = property(__get_term) 1654
1655 - def __init__ (self, term, min=0, max=1, **kw):
1656 """Term with a numerical constraint. 1657 1658 @param term: A term, the number of appearances of which is 1659 constrained in this term. 1660 @type term: L{Node} 1661 1662 @keyword min: The minimum number of occurrences of C{term}. 1663 The value must be non-negative. 1664 1665 @keyword max: The maximum number of occurrences of C{term}. 1666 The value must be positive (in which case it must also be no 1667 smaller than C{min}), or C{None} to indicate an unbounded 1668 number of occurrences.""" 1669 super(NumericalConstraint, self).__init__(**kw) 1670 self.__term = term 1671 self.__min = min 1672 self.__max = max
1673
1674 - def clone (self):
1675 return super(NumericalConstraint, self).clone(self.__term, self.__min, self.__max)
1676
1677 - def _first (self):
1678 return [ (0,) + _fc for _fc in self.__term.first ]
1679
1680 - def _last (self):
1681 return [ (0,) + _lc for _lc in self.__term.last ]
1682
1683 - def _nullable (self):
1684 return (0 == self.__min) or self.__term.nullable
1685
1686 - def _follow (self):
1687 rv = {} 1688 pp = (0,) 1689 last_r1 = set(self.__term.last) 1690 for (q, transition_set) in self.__term.follow.iteritems(): 1691 rv[pp+q] = self._PosConcatTransitionSet(pp, transition_set) 1692 if q in last_r1: 1693 last_r1.remove(q) 1694 for sq1 in self.__term.first: 1695 q1 = pp+sq1 1696 psi = {} 1697 for p1 in self.__term.counterSubPositions(q): 1698 psi[pp+p1] = self.RESET 1699 if (1 != self.min) or (self.max is not None): 1700 psi[()] = self.INCREMENT 1701 rv[pp+q].append((q1, psi)) 1702 assert not last_r1 1703 return rv
1704
1705 - def _walkTermTree (self, position, pre, post, arg):
1706 if pre is not None: 1707 pre(self, position, arg) 1708 self.__term._walkTermTree(position + (0,), pre, post, arg) 1709 if post is not None: 1710 post(self, position, arg)
1711
1712 - def __str__ (self):
1713 rv = str(self.__term) 1714 if self.__term._Precedence < self._Precedence: 1715 rv = '(' + rv + ')' 1716 rv += '^(%u,' % (self.__min,) 1717 if self.__max is not None: 1718 rv += '%u' % (self.__max) 1719 return rv + ')'
1720
1721 -class Choice (MultiTermNode):
1722 """A term that may be any one of a set of terms. 1723 1724 This term matches if any one of its contained terms matches.""" 1725 1726 _Precedence = -3 1727
1728 - def __init__ (self, *terms, **kw):
1729 """Term that selects one of a set of terms. 1730 1731 The terms are provided as arguments. All must be instances of 1732 a subclass of L{Node}.""" 1733 super(Choice, self).__init__(*terms, **kw)
1734
1735 - def _first (self):
1736 rv = set() 1737 for c in xrange(len(self.terms)): 1738 rv.update([ (c,) + _fc for _fc in self.terms[c].first]) 1739 return rv
1740
1741 - def _last (self):
1742 rv = set() 1743 for c in xrange(len(self.terms)): 1744 rv.update([ (c,) + _lc for _lc in self.terms[c].last]) 1745 return rv
1746
1747 - def _nullable (self):
1748 for t in self.terms: 1749 if t.nullable: 1750 return True 1751 return False
1752
1753 - def _follow (self):
1754 rv = {} 1755 for c in xrange(len(self.terms)): 1756 for (q, transition_set) in self.terms[c].follow.iteritems(): 1757 pp = (c,) 1758 rv[pp + q] = self._PosConcatTransitionSet(pp, transition_set) 1759 return rv
1760
1761 - def __str__ (self):
1762 elts = [] 1763 for t in self.terms: 1764 if t._Precedence < self._Precedence: 1765 elts.append('(' + str(t) + ')') 1766 else: 1767 elts.append(str(t)) 1768 return '+'.join(elts)
1769
1770 -class Sequence (MultiTermNode):
1771 """A term that is an ordered sequence of terms.""" 1772 1773 _Precedence = -2 1774
1775 - def __init__ (self, *terms, **kw):
1776 """Term that collects an ordered sequence of terms. 1777 1778 The terms are provided as arguments. All must be instances of 1779 a subclass of L{Node}.""" 1780 super(Sequence, self).__init__(*terms, **kw)
1781
1782 - def _first (self):
1783 rv = set() 1784 c = 0 1785 while c < len(self.terms): 1786 t = self.terms[c] 1787 rv.update([ (c,) + _fc for _fc in t.first]) 1788 if not t.nullable: 1789 break 1790 c += 1 1791 return rv
1792
1793 - def _last (self):
1794 rv = set() 1795 c = len(self.terms) - 1; 1796 while 0 <= c: 1797 t = self.terms[c] 1798 rv.update([ (c,) + _lc for _lc in t.last]) 1799 if not t.nullable: 1800 break 1801 c -= 1 1802 return rv
1803
1804 - def _nullable (self):
1805 for t in self.terms: 1806 if not t.nullable: 1807 return False 1808 return True
1809
1810 - def _follow (self):
1811 rv = {} 1812 for c in xrange(len(self.terms)): 1813 pp = (c,) 1814 for (q, transition_set) in self.terms[c].follow.iteritems(): 1815 rv[pp + q] = self._PosConcatTransitionSet(pp, transition_set) 1816 for c in xrange(len(self.terms)-1): 1817 t = self.terms[c] 1818 pp = (c,) 1819 # Link from the last of one term to the first of the next term. 1820 # Repeat while the destination term is nullable and there are 1821 # successor terms. 1822 for q in t.last: 1823 psi = {} 1824 for p1 in t.counterSubPositions(q): 1825 psi[pp + p1] = self.RESET 1826 nc = c 1827 while nc+1 < len(self.terms): 1828 nc += 1 1829 nt = self.terms[nc] 1830 for sq1 in nt.first: 1831 q1 = (nc,) + sq1 1832 rv[pp+q].append((q1, psi)) 1833 if not nt.nullable: 1834 break 1835 return rv
1836
1837 - def __str__ (self):
1838 elts = [] 1839 for t in self.terms: 1840 if t._Precedence < self._Precedence: 1841 elts.append('(' + str(t) + ')') 1842 else: 1843 elts.append(str(t)) 1844 return '.'.join(elts)
1845
1846 -class All (MultiTermNode, LeafNode):
1847 """A term that is an unordered sequence of terms. 1848 1849 Note that the inheritance structure for this node is unusual. It 1850 has multiple children when it is treated as a term tree, but is 1851 considered a leaf node when constructing an automaton. 1852 """ 1853 1854 _Precedence = 0 1855
1856 - def __init__ (self, *terms, **kw):
1857 """Term that collects an unordered sequence of terms. 1858 1859 The terms are provided as arguments. All must be instances of 1860 a subclass of L{Node}.""" 1861 super(All, self).__init__(*terms, **kw)
1862
1863 - def _nullable (self):
1864 for t in self.terms: 1865 if not t.nullable: 1866 return False 1867 return True
1868 1869 @classmethod
1870 - def CreateTermTree (cls, *terms):
1871 """Create a term tree that implements unordered catenation of 1872 the terms. 1873 1874 This expansion results in a standard choice/sequence term 1875 tree, at the cost of quadratic state expansion because terms 1876 are L{cloned<Node.clone>} as required to satisfy the tree 1877 requirements of the term tree. 1878 1879 @param terms: The tuple of terms that are elements of an 1880 accepted sequence. 1881 1882 @return: A term tree comprising a choice between sequences 1883 that connect each term to the unordered catenation of the 1884 remaining terms.""" 1885 if 1 == len(terms): 1886 return terms[0] 1887 disjuncts = [] 1888 for i in xrange(len(terms)): 1889 n = terms[i] 1890 rem = map(lambda _s: _s.clone(), terms[:i] + terms[i+1:]) 1891 disjuncts.append(Sequence(n, cls.CreateTermTree(*rem))) 1892 return Choice(*disjuncts)
1893
1894 - def __str__ (self):
1895 return u'&(' + ','.join([str(_t) for _t in self.terms]) + ')'
1896
1897 -class Symbol (LeafNode):
1898 """A leaf term that is a symbol. 1899 1900 The symbol is represented by the L{metadata} field.""" 1901 1902 _Precedence = 0 1903
1904 - def __init__ (self, symbol, **kw):
1905 kw['metadata'] = symbol 1906 super(Symbol, self).__init__(**kw)
1907
1908 - def clone (self):
1909 return super(Symbol, self).clone(self.metadata)
1910
1911 - def __str__ (self):
1912 return str(self.metadata)
1913