Package pyxb :: Package utils :: Module fac
[hide private]
[frames] | no frames]

Source Code for Module pyxb.utils.fac

   1  # -*- coding: utf-8 -*- 
   2  # Copyright 2012, Peter A. Bigot 
   3  # 
   4  # Licensed under the Apache License, Version 2.0 (the "License"); you may 
   5  # not use this file except in compliance with the License. You may obtain a 
   6  # copy of the License at: 
   7  # 
   8  #            http://www.apache.org/licenses/LICENSE-2.0 
   9  # 
  10  # Unless required by applicable law or agreed to in writing, software 
  11  # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 
  12  # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 
  13  # License for the specific language governing permissions and limitations 
  14  # under the License. 
  15   
  16  """This module provides Finite Automata with Counters. 
  17   
  18  FACs are type of state machine where a transition may include a 
  19  constraint and a modification to a set of counters.  They are used to 
  20  implement regular expressions with numerical constraints, as are found 
  21  in POSIX regexp, Perl, and XML schema. 
  22   
  23  The implementation here derives from U{Regular Expressions with 
  24  Numerical Constraints and Automata with Counters 
  25  <https://bora.uib.no/bitstream/1956/3628/3/Hovland_LNCS%205684.pdf>}, 
  26  Dag Hovland, Lecture Notes in Computer Science, 2009, Volume 5684, 
  27  Theoretical Aspects of Computing - ICTAC 2009, Pages 231-245.  In what 
  28  follows, this reference will be denoted B{HOV09}. 
  29   
  30  A regular expression is directly translated into a term tree, where 
  31  nodes are operators such as sequence, choice, and counter 
  32  restrictions, and the leaf nodes denote symbols in the language of the 
  33  regular expression. 
  34   
  35  In the case of XML content models, the symbols include L{element 
  36  declarations <pyxb.xmlschema.structures.ElementDeclaration>} and 
  37  L{wildcard elements <pyxb.xmlschema.structures.Wildcard>}.  A 
  38  numerical constraint node corresponds to an L{XML particle 
  39  <pyxb.xmlschema.structures.Particle>}, and choice and sequence nodes 
  40  derive from L{model groups <pyxb.xmlschema.structures.ModelGroup>} of 
  41  types B{choice} and B{sequence}.  As suggested in U{The Membership 
  42  Problem for Regular Expressions with Unordered Concatenation and 
  43  Numerical Constraints <http://www.ii.uib.no/~dagh/presLATA2012.pdf>} 
  44  the B{all} content model can be translated into state machine using 
  45  choice and sequence at the cost of a quadratic size explosion.  Since 
  46  some XML content models might have a hundred terms in an unordered 
  47  catenation, this is not acceptable, and the implementation here 
  48  optimizes this construct by creating a leaf node in the automaton 
  49  which in turn contains sub-automata for each term, and permits an exit 
  50  transition only when all the terms that are required have been 
  51  completed. 
  52   
  53  @note: In XSD 1.1 the restriction that terms in an B{all} model group 
  54  occur at most once has been removed.  Since the current implementation 
  55  removes a completed term from the set of available terms, this will 
  56  not work: instead the subconfiguration with its counter values must be 
  57  retained between matches. 
  58  """ 
  59   
  60  import operator 
  61  import functools 
  62  import logging 
  63   
  64  log_ = logging.getLogger(__name__) 
65 66 -class FACError (Exception):
67 pass
68
69 -class InvalidTermTreeError (FACError):
70 """Exception raised when a FAC term tree is not a tree. 71 72 For example, a L{Symbol} node appears multiple times, or a cycle is detected.""" 73 74 parent = None 75 """The L{MultiTermNode} containing the term that proves invalidity""" 76 77 term = None 78 """The L{Node} that proves invalidity""" 79
80 - def __init__ (self, *args):
81 (self.parent, self.term) = args 82 super(InvalidTermTreeError, self).__init__(*args)
83
84 -class UpdateApplicationError (FACError):
85 """Exception raised when an unsatisfied update instruction is executed. 86 87 This indicates an internal error in the implementation.""" 88 89 update_instruction = None 90 """The L{UpdateInstruction} with an unsatisfied L{CounterCondition}""" 91 92 values = None 93 """The unsatisfying value map from L{CounterCondition} instances to integers""" 94
95 - def __init__ (self, *args):
96 (self.update_instruction, self.values) = args 97 super(UpdateApplicationError, self).__init__(*args)
98
99 -class AutomatonStepError (Exception):
100 """Symbol rejected by L{Configuration_ABC.step}. 101 102 The exception indicates that the proposed symbol either failed to 103 produce a transition (L{UnrecognizedSymbolError}) or produced 104 multiple equally valid transitions 105 (L{NondeterministicSymbolError}).""" 106 107 configuration = None 108 """The instance of L{Configuration_ABC} that raised the exception. 109 From L{Configuration_ABC.acceptableSymbols} you can determine what 110 alternatives might be present.""" 111 112 symbol = None 113 """The symbol that was not accepted.""" 114
115 - def __get_acceptable (self):
116 """A list of symbols that the configuration would accept in its current state.""" 117 return self.configuration.acceptableSymbols()
118 acceptable = property(__get_acceptable) 119
120 - def __init__ (self, *args):
121 (self.configuration, self.symbol) = args 122 super(AutomatonStepError, self).__init__(*args)
123
124 -class UnrecognizedSymbolError (AutomatonStepError):
125 """L{Configuration.step} failed to find a valid transition.""" 126 pass
127
128 -class NondeterministicSymbolError (AutomatonStepError):
129 """L{Configuration.step} found multiple transitions.""" 130 pass
131
132 -class SymbolMatch_mixin (object):
133 """Mix-in used by symbols to provide a custom match implementation. 134 135 If a L{State.symbol} value is an instance of this mix-in, then it 136 will be used to validate a candidate symbol for a match.""" 137
138 - def match (self, symbol):
139 raise NotImplementedError('%s.match' % (type(self).__name__,))
140
141 -class State (object):
142 """A thin wrapper around an object reference. 143 144 The state of the automaton corresponds to a position, or marked 145 symbol, in the term tree. Because the same symbol may appear at 146 multiple locations in the tree, and the distinction between these 147 positions is critical, a L{State} wrapper is provided to maintain 148 distinct values.""" 149
150 - def __init__ (self, symbol, is_initial, final_update=None, is_unordered_catenation=False):
151 """Create a FAC state. 152 153 @param symbol: The symbol associated with the state. 154 Normally initialized from the L{Symbol.metadata} value. The 155 state may be entered if, among other conditions, the L{match} 156 routine accepts the proposed input as being consistent with 157 this value. 158 159 @param is_initial: C{True} iff this state may serve as the 160 first state of the automaton. 161 162 @param final_update: C{None} if this state is not an 163 accepting state of the automaton; otherwise a set of 164 L{UpdateInstruction} values that must be satisfied by the 165 counter values in a configuration as a further restriction of 166 acceptance. 167 168 @param is_unordered_catenation: C{True} if this state has 169 subautomata that must be matched to execute the unordered 170 catenation of an L{All} node; C{False} if this is a regular 171 symbol.""" 172 self.__symbol = symbol 173 self.__isInitial = not not is_initial 174 self.__finalUpdate = final_update 175 self.__isUnorderedCatenation = is_unordered_catenation
176 177 __automaton = None
178 - def __get_automaton (self):
179 """Link to the L{Automaton} to which the state belongs.""" 180 return self.__automaton
181 - def _set_automaton (self, automaton):
182 """Method invoked during automaton construction to set state owner.""" 183 assert self.__automaton is None 184 self.__automaton = automaton 185 return self
186 automaton = property(__get_automaton) 187 188 __symbol = None
189 - def __get_symbol (self):
190 """Application-specific metadata identifying the symbol. 191 192 See also L{match}.""" 193 return self.__symbol
194 symbol = property(__get_symbol) 195 196 __isUnorderedCatenation = None
197 - def __get_isUnorderedCatenation (self):
198 """Indicate whether the state has subautomata for unordered 199 catenation. 200 201 To reduce state explosion due to non-determinism, such a state 202 executes internal transitions in subautomata until all terms 203 have matched or a failure is discovered.""" 204 return self.__isUnorderedCatenation
205 isUnorderedCatenation = property(__get_isUnorderedCatenation) 206 207 __subAutomata = None
208 - def __get_subAutomata (self):
209 """A sequence of sub-automata supporting internal state transitions. 210 211 This will return C{None} unless L{isUnorderedCatenation} is C{True}.""" 212 return self.__subAutomata
213 - def _set_subAutomata (self, *automata):
214 assert self.__subAutomata is None 215 assert self.__isUnorderedCatenation 216 self.__subAutomata = automata
217 subAutomata = property(__get_subAutomata) 218 219 __isInitial = None
220 - def __get_isInitial (self):
221 """C{True} iff this state may be the first state the automaton enters.""" 222 return self.__isInitial
223 isInitial = property(__get_isInitial) 224 225 __automatonEntryTransitions = None
227 """Return the set of initial transitions allowing entry to the automata through this state. 228 229 These are structurally-permitted transitions only, and must be 230 filtered based on the symbol that might trigger the 231 transition. The results are not filtered based on counter 232 value, since this value is used to determine how the 233 containing automaton might be entered. Consequently the 234 return value is the empty set unless this is an initial state. 235 236 The returned set is closed under entry to sub-automata, 237 i.e. it is guaranteed that each transition includes a 238 consuming state even if it requires a multi-element chain of 239 transitions into subautomata to reach one.""" 240 if self.__automatonEntryTransitions is None: 241 transitions = [] 242 if self.__isInitial: 243 xit = Transition(self, set()) 244 if self.__subAutomata is None: 245 transitions.append(xit) 246 else: 247 for sa in self.__subAutomata: 248 for saxit in sa.initialTransitions: 249 transitions.append(xit.chainTo(saxit.makeEnterAutomatonTransition())) 250 self.__automatonEntryTransitions = transitions 251 return self.__automatonEntryTransitions
252 automatonEntryTransitions = property(__get_automatonEntryTransitions) 253 254 __finalUpdate = None
255 - def __get_finalUpdate (self):
256 """Return the update instructions that must be satisfied for this to be a final state.""" 257 return self.__finalUpdate
258 finalUpdate = property(__get_finalUpdate) 259
260 - def subAutomataInitialTransitions (self, sub_automata=None):
261 """Return the set of candidate transitions to enter a sub-automaton of this state. 262 263 @param sub_automata: A subset of the sub-automata of this 264 state which should contribute to the result. If C{None}, all 265 sub-automata are used. 266 267 @return: A pair C{(nullable, transitions)} where C{nullable} 268 is C{True} iff there is at least one sub-automaton that is in 269 an accepting state on entry, and C{transitions} is a list of 270 L{Transition} instances describing how to reach some state in 271 a sub-automaton via a consumed symbol. 272 """ 273 assert self.__subAutomata is not None 274 is_nullable = True 275 transitions = [] 276 if sub_automata is None: 277 sub_automata = self.__subAutomata 278 for sa in sub_automata: 279 if not sa.nullable: 280 is_nullable = False 281 transitions.extend(sa.initialTransitions) 282 return (is_nullable, transitions)
283
284 - def isAccepting (self, counter_values):
285 """C{True} iff this state is an accepting state for the automaton. 286 287 @param counter_values: Counter values that further validate 288 whether the requirements of the automaton have been met. 289 290 @return: C{True} if this is an accepting state and the 291 counter values relevant at it are satisfied.""" 292 if self.__finalUpdate is None: 293 return False 294 return UpdateInstruction.Satisfies(counter_values, self.__finalUpdate)
295 296 __transitionSet = None
297 - def __get_transitionSet (self):
298 """Definitions of viable transitions from this state. 299 300 The transition set of a state is a set of L{Transition} nodes 301 identifying a state reachable in a single step from this 302 state, and a set of counter updates that must apply if the 303 transition is taken. 304 305 These transitions may not in themselves consume a symbol. For 306 example, if the destination state represents a match of an 307 L{unordered catenation of terms<All>}, then secondary 308 processing must be done to traverse into the automata for 309 those terms and identify transitions that include a symbol 310 consumption. 311 312 @note: Although conceptually the viable transitions are a set, 313 this implementation maintains them in a list so that order is 314 preserved when automata processing becomes non-deterministic. 315 PyXB is careful to build the transition list so that the 316 states are attempted in the order in which they appear in the 317 schema that define the automata. 318 """ 319 return self.__transitionSet
320 transitionSet = property(__get_transitionSet) 321
322 - def _set_transitionSet (self, transition_set):
323 """Method invoked during automaton construction to set the 324 legal transitions from the state. 325 326 The set of transitions cannot be defined until all states that 327 appear in it are available, so the creation of the automaton 328 requires that the association of the transition set be 329 delayed. (Though described as a set, the transitions are a 330 list where order reflects priority.) 331 332 @param transition_set: a list of pairs where the first 333 member is the destination L{State} and the second member is the 334 set of L{UpdateInstruction}s that apply when the automaton 335 transitions to the destination state.""" 336 337 self.__transitionSet = [] 338 seen = set() 339 for xit in transition_set: 340 if not (xit in seen): 341 seen.add(xit) 342 self.__transitionSet.append(xit)
343
344 - def match (self, symbol):
345 """Return C{True} iff the symbol matches for this state. 346 347 This may be overridden by subclasses when matching by 348 equivalence does not work. Alternatively, if the symbol 349 stored in this node is a subclass of L{SymbolMatch_mixin}, then 350 its match method will be used. Otherwise C{symbol} matches 351 only if it is equal to the L{symbol} of this state. 352 353 @param symbol: A candidate symbol corresponding to the 354 expression symbol for this state. 355 356 @return: C{True} iff C{symbol} is a match for this state. 357 """ 358 if isinstance(self.__symbol, SymbolMatch_mixin): 359 return self.__symbol.match(symbol) 360 return self.__symbol == symbol
361
362 - def __str__ (self):
363 return 'S.%x' % (id(self),)
364
365 - def _facText (self):
366 rv = [] 367 rv.extend(map(str, self.__transitionSet)) 368 if self.__finalUpdate is not None: 369 if 0 == len(self.__finalUpdate): 370 rv.append('Final (no conditions)') 371 else: 372 rv.append('Final if %s' % (' '.join(map(lambda _ui: str(_ui.counterCondition), self.__finalUpdate)))) 373 return '\n'.join(rv)
374
375 -class CounterCondition (object):
376 """A counter condition is a range limit on valid counter values. 377 378 Instances of this class serve as keys for the counters that 379 represent the configuration of a FAC. The instance also maintains 380 a pointer to application-specific L{metadata}.""" 381 382 __min = None
383 - def __get_min (self):
384 """The minimum legal value for the counter. 385 386 This is a non-negative integer.""" 387 return self.__min
388 min = property(__get_min) 389 390 __max = None
391 - def __get_max (self):
392 """The maximum legal value for the counter. 393 394 This is a positive integer, or C{None} to indicate that the 395 counter is unbounded.""" 396 return self.__max
397 max = property(__get_max) 398 399 __metadata = None
400 - def __get_metadata (self):
401 """A pointer to application metadata provided when the condition was created.""" 402 return self.__metadata
403 metadata = property(__get_metadata) 404
405 - def __init__ (self, min, max, metadata=None):
406 """Create a counter condition. 407 408 @param min: The value for L{min} 409 @param max: The value for L{max} 410 @param metadata: The value for L{metadata} 411 """ 412 self.__min = min 413 self.__max = max 414 self.__metadata = metadata
415
416 - def __hash__ (self):
417 return hash(self.__min) ^ hash(self.__max) ^ hash(self.__metadata)
418
419 - def __eq__ (self, other):
420 return (other is not None) \ 421 and (self.__min == other.__min) \ 422 and (self.__max == other.__max) \ 423 and (self.__metadata == other.__metadata)
424
425 - def __ne__ (self, other):
426 return not self.__eq__(other)
427
428 - def __str__ (self):
429 return 'C.%x{%s,%s}' % (id(self), self.min, self.max is not None and self.max or '')
430
431 -class UpdateInstruction:
432 """An update instruction pairs a counter with a mutation of that 433 counter. 434 435 The instruction is executed during a transition from one state to 436 another, and causes the corresponding counter to be incremented or 437 reset. The instruction may only be applied if doing so does not 438 violate the conditions of the counter it affects.""" 439 440 __counterCondition = None
441 - def __get_counterCondition (self):
442 """A reference to the L{CounterCondition} identifying the 443 counter to be updated. 444 445 The counter condition instance is used as a key to the 446 dictionary maintaining current counter values.""" 447 return self.__counterCondition
448 counterCondition = property(__get_counterCondition) 449 450 __doIncrement = None
451 - def __get_doIncrement (self):
452 """C{True} if the counter is to be incremented; C{False} if it is to be reset.""" 453 return self.__doIncrement
454 doIncrement = property(__get_doIncrement) 455 456 # Cached values extracted from counter condition 457 __min = None 458 __max = None 459
460 - def __init__ (self, counter_condition, do_increment):
461 """Create an update instruction. 462 463 @param counter_condition: A L{CounterCondition} identifying a 464 minimum and maximum value for a counter, and serving as a map 465 key for the value of the corresponding counter. 466 467 @param do_increment: C{True} if the update is to increment 468 the value of the counter; C{False} if the update is to reset 469 the counter. 470 """ 471 self.__counterCondition = counter_condition 472 self.__doIncrement = not not do_increment 473 self.__min = counter_condition.min 474 self.__max = counter_condition.max
475
476 - def satisfiedBy (self, counter_values):
477 """Implement a component of definition 5 from B{HOV09}. 478 479 The update instruction is satisfied by the counter values if 480 its action may be legitimately applied to the value of its 481 associated counter. 482 483 @param counter_values: A map from L{CounterCondition}s to 484 non-negative integers 485 486 @return: C{True} or C{False} 487 """ 488 value = counter_values[self.__counterCondition] 489 if self.__doIncrement \ 490 and (self.__max is not None) \ 491 and (value >= self.__max): 492 return False 493 if (not self.__doIncrement) \ 494 and (value < self.__min): 495 return False 496 return True
497 498 @classmethod
499 - def Satisfies (cls, counter_values, update_instructions):
500 """Return C{True} iff the counter values satisfy the update 501 instructions. 502 503 @param counter_values: A map from L{CounterCondition} to 504 integer counter values 505 506 @param update_instructions: A set of L{UpdateInstruction} 507 instances 508 509 @return: C{True} iff all instructions are satisfied by the 510 values and limits.""" 511 for psi in update_instructions: 512 if not psi.satisfiedBy(counter_values): 513 return False 514 return True
515
516 - def apply (self, counter_values):
517 """Apply the update instruction to the provided counter values. 518 519 @param counter_values: A map from L{CounterCondition} to 520 integer counter values. This map is updated in-place.""" 521 if not self.satisfiedBy(counter_values): 522 raise UpdateApplicationError(self, counter_values) 523 value = counter_values[self.__counterCondition] 524 if self.__doIncrement: 525 value += 1 526 else: 527 value = 1 528 counter_values[self.__counterCondition] = value
529 530 @classmethod
531 - def Apply (cls, update_instructions, counter_values):
532 """Apply the update instructions to the counter values. 533 534 @param update_instructions: A set of L{UpdateInstruction} 535 instances. 536 537 @param counter_values: A map from L{CounterCondition} 538 instances to non-negative integers. This map is updated 539 in-place by applying each instruction in 540 C{update_instructions}.""" 541 for psi in update_instructions: 542 psi.apply(counter_values)
543
544 - def __hash__ (self):
545 return hash(self.__counterCondition) ^ hash(self.__doIncrement)
546
547 - def __eq__ (self, other):
548 return (other is not None) \ 549 and (self.__doIncrement == other.__doIncrement) \ 550 and (self.__counterCondition == other.__counterCondition)
551
552 - def __ne__ (self, other):
553 return not self.__eq__(other)
554
555 - def __str__ (self):
556 return '%s %s' % (self.__doIncrement and 'inc' or 'reset', self.__counterCondition)
557
558 -class Transition (object):
559 """Representation of a FAC state transition.""" 560 561 __destination = None
562 - def __get_destination (self):
563 """The transition destination state.""" 564 return self.__destination
565 destination = property(__get_destination) 566 567 __updateInstructions = None
568 - def __get_updateInstructions (self):
569 """The set of counter updates that are applied when the transition is taken.""" 570 return self.__updateInstructions
571 updateInstructions = property(__get_updateInstructions) 572 573 __nextTransition = None
574 - def __get_nextTransition (self):
575 """The next transition to apply in this chain. 576 577 C{None} if this is the last transition in the chain.""" 578 return self.__nextTransition
579 nextTransition = property(__get_nextTransition) 580 581 __layerLink = None 597 layerLink = property(__get_layerLink) 598
599 - def __init__ (self, destination, update_instructions, layer_link=None):
600 """Create a transition to a state. 601 602 @param destination: the state into which the transition is 603 made 604 605 @param update_instructions: A iterable of L{UpdateInstruction}s 606 denoting the changes that must be made to counters as a 607 consequence of taking the transition. 608 609 @keyword layer_link: The value for L{layerLink}.""" 610 self.__destination = destination 611 if not isinstance(update_instructions, list): 612 update_instructions = list(update_instructions) 613 self.__updateInstructions = update_instructions 614 self.__layerLink = layer_link
615
616 - def consumingState (self):
617 """Return the state in this transition chain that must match a symbol.""" 618 619 # Transitions to a state with subautomata never consume anything 620 if self.__destination.subAutomata is not None: 621 if not self.__nextTransition: 622 return None 623 return self.__nextTransition.consumingState() 624 # I don't think there should be subsequent transitions 625 assert self.__nextTransition is None 626 return self.__destination
627
628 - def consumedSymbol (self):
629 """Return the L{symbol<State.symbol>} of the L{consumingState}.""" 630 return self.consumingState().symbol
631
632 - def satisfiedBy (self, configuration):
633 """Check the transition update instructions against 634 configuration counter values. 635 636 This implementation follows layer changes, updating the 637 configuration used as counter value source as necessary. 638 639 @param configuration: A L{Configuration} instance containing 640 counter data against which update instruction satisfaction is 641 checked. 642 643 @return: C{True} iff all update instructions along the 644 transition chain are satisfied by their relevant 645 configuration.""" 646 # If we're entering an automaton, we know no subsequent 647 # transitions have update instructions 648 if isinstance(self.__layerLink, Automaton): 649 return True 650 # If we're leaving an automaton, switch to the configuration 651 # that is relevant to the destination of the transition. 652 if isinstance(self.__layerLink, Configuration): 653 configuration = self.__layerLink 654 assert self.destination.automaton == configuration.automaton 655 # Blow chunks if the configuration doesn't satisfy the transition 656 if not configuration.satisfies(self): 657 return False 658 # Otherwise try the next transition, or succeed if there isn't one 659 if self.__nextTransition: 660 return self.__nextTransition.satisfiedBy(configuration) 661 return True
662
663 - def apply (self, configuration, clone_map=None):
664 """Apply the transitition to a configuration. 665 666 This updates the configuration counter values based on the 667 update instructions, and sets the new configuration state. 668 669 @note: If the transition involves leaving a sub-automaton or 670 creating a new sub-automaton, the returned configuration 671 structure will be different from the one passed in. You 672 should invoke this as:: 673 674 cfg = transition.apply(cfg) 675 676 @param configuration: A L{Configuration} of an executing automaton 677 678 @param clone_map: A map from L{Configuration} to 679 L{Configuration} reflecting the replacements made when the 680 configuration for which the transition was calculated was 681 subsequently cloned into the C{configuration} passed into this 682 method. This is only necessary when the transition includes 683 layer transitions. 684 685 @return: The resulting configuration 686 """ 687 layer_link = self.__layerLink 688 if isinstance(layer_link, Configuration): 689 if clone_map is not None: 690 layer_link = clone_map[layer_link] 691 configuration = layer_link.leaveAutomaton(configuration) 692 elif isinstance(layer_link, Automaton): 693 configuration = configuration.enterAutomaton(layer_link) 694 UpdateInstruction.Apply(self.updateInstructions, configuration._get_counterValues()) 695 configuration._set_state(self.destination, layer_link is None) 696 if self.__nextTransition is None: 697 return configuration 698 return self.__nextTransition.apply(configuration, clone_map)
699
700 - def chainTo (self, next_transition):
701 """Duplicate the state and chain the duplicate to a successor 702 transition. 703 704 This returns a new transition which applies the operation for 705 this transition, then proceeds to apply the next transition in 706 the chain. 707 708 @note: The node that is invoking this must not have successor 709 transitions. 710 711 @param next_transition: A L{Transition} node describing a 712 subsequent transition. 713 714 @return: a clone of this node, augmented with a link to 715 C{next_transition}.""" 716 assert not self.__nextTransition 717 head = type(self)(self.__destination, self.__updateInstructions, layer_link=self.__layerLink) 718 head.__nextTransition = next_transition 719 return head
720
722 """Replicate the transition as a layer link into its automaton. 723 724 This is used on initial transitions into sub-automata where a 725 sub-configuration must be created and recorded.""" 726 assert self.__layerLink is None 727 assert self.__nextTransition is None 728 head = type(self)(self.__destination, self.__updateInstructions) 729 head.__layerLink = self.__destination.automaton 730 return head
731
732 - def __hash__ (self):
733 rv = hash(self.__destination) 734 for ui in self.__updateInstructions: 735 rv ^= hash(ui) 736 return rv ^ hash(self.__nextTransition) ^ hash(self.__layerLink)
737
738 - def __eq__ (self, other):
739 return (other is not None) \ 740 and (self.__destination == other.__destination) \ 741 and (self.__updateInstructions == other.__updateInstructions) \ 742 and (self.__nextTransition == other.__nextTransition) \ 743 and (self.__layerLink == other.__layerLink)
744
745 - def __ne__ (self, other):
746 return not self.__eq__(other)
747
748 - def __str__ (self):
749 rv = [] 750 if isinstance(self.__layerLink, Configuration): 751 rv.append('from A%x ' % (id(self.__layerLink.automaton),)) 752 elif isinstance(self.__layerLink, Automaton): 753 rv.append('in A%x ' % (id(self.__layerLink))) 754 rv.append('enter %s ' % (self.destination,)) 755 if (self.consumingState() == self.destination): 756 rv.append('via %s ' % (self.destination.symbol,)) 757 rv.append('with %s' % (' ; '.join(map(str, self.updateInstructions)),)) 758 if self.__nextTransition: 759 rv.append("\n\tthen ") 760 rv.append(str(self.__nextTransition)) 761 return ''.join(rv)
762
763 -class Configuration_ABC (object):
764 """Base class for something that represents an L{Automaton} in 765 execution. 766 767 For deterministic automata, this is generally a L{Configuration} 768 which records the current automaton state along with its counter 769 values. 770 771 For non-deterministic automata, this is a L{MultiConfiguration} 772 which records a set of L{Configuration}s.""" 773
774 - def acceptableSymbols (self):
775 """Return the acceptable L{Symbol}s given the current 776 configuration. 777 778 This method extracts the symbol from all candidate transitions 779 that are permitted based on the current counter values. 780 Because transitions are presented in a preferred order, the 781 symbols are as well.""" 782 raise NotImplementedError('%s.acceptableSymbols' % (type(self).__name__,))
783
784 - def step (self, symbol):
785 """Execute an automaton transition using the given symbol. 786 787 @param symbol: A symbol from the alphabet of the automaton's 788 language. This is a Python value that should be accepted by 789 the L{SymbolMatch_mixin.match} method of a L{State.symbol}. 790 It is not a L{Symbol} instance. 791 792 @return: The new configuration resulting from the step. 793 794 @raises AutomatonStepError: L{UnrecognizedSymbolError} 795 when no transition compatible with C{symbol} is available, and 796 L{NondeterministicSymbolError} if C{symbol} admits multiple 797 transitions and the subclass does not support 798 non-deterministic steps (see L{MultiConfiguration}). 799 800 @warning: If the step entered or left a sub-automaton the 801 return value will not be the configuration that was used to 802 execute the step. The proper pattern for using this method 803 is:: 804 805 cfg = cfg.step(sym) 806 807 """ 808 raise NotImplementedError('%s.step' % (type(self).__name__,))
809
810 -class Configuration (Configuration_ABC):
811 """The state of an L{Automaton} in execution. 812 813 This combines a state node of the automaton with a set of counter 814 values.""" 815 816 __state = None
817 - def __get_state (self):
818 """The state of the configuration. 819 820 This is C{None} to indicate an initial state, or one of the underlying automaton's states.""" 821 return self.__state
822 - def _set_state (self, state, is_layer_change):
823 """Internal state transition interface. 824 825 @param state: the new destination state 826 827 @param is_layer_change: C{True} iff the transition inducing 828 the state change involves a layer change. 829 """ 830 831 # If the new state and old state are the same, the layer 832 # change has no effect (we're probably leaving a 833 # subconfiguration, and we want to keep the current set of 834 # sub-automata.) 835 if state == self.__state: 836 return 837 838 # Otherwise, discard any unprocessed automata in the former 839 # state, set the state, and if the new state has subautomata 840 # create a set holding them so they can be processed. 841 if is_layer_change: 842 self.__subConfiguration = None 843 self.__subAutomata = None 844 self.__state = state 845 if is_layer_change and (state.subAutomata is not None): 846 assert self.__subAutomata is None 847 self.__subAutomata = list(state.subAutomata)
848 state = property(__get_state) 849 850 __counterValues = None 851 """The values of the counters. 852 853 This is a map from the CounterCondition instances of the 854 underlying automaton to integer values."""
855 - def _get_counterValues (self):
856 return self.__counterValues
857 858 __automaton = None
859 - def __get_automaton (self):
860 return self.__automaton
861 automaton = property(__get_automaton) 862 863 __subConfiguration = None
864 - def __get_subConfiguration (self):
865 """Reference to configuration being executed in a sub-automaton. 866 867 C{None} if no sub-automaton is active, else a reference to a 868 configuration that is being executed in a sub-automaton. 869 870 Sub-configurations are used to match sub-terms in an 871 L{unordered catenation<All>} term. A configuration may have 872 at most one sub-configuration at a time, and the configuration 873 will be removed and possibly replaced when the term being 874 processed completes.""" 875 return self.__subConfiguration
876 subConfiguration = property(__get_subConfiguration) 877 878 __superConfiguration = None
879 - def __get_superConfiguration (self):
880 """Reference to the configuration for which this is a 881 sub-configuration. 882 883 C{None} if no super-automaton is active, else a reference to a 884 configuration that is being executed in a super-automaton. 885 886 The super-configuration relation persists for the lifetime of 887 the configuration.""" 888 return self.__superConfiguration
889 superConfiguration = property(__get_superConfiguration) 890 891 __subAutomata = None
892 - def __get_subAutomata (self):
893 """A set of automata that must be satisfied before the current state can complete. 894 895 This is used in unordered catenation. Each sub-automaton 896 represents a term in the catenation. When the configuration 897 enters a state with sub-automata, a set containing references 898 to those automata is assigned to this attribute. 899 Subsequently, until all automata in the state are satisfied, 900 transitions can only occur within an active sub-automaton, out 901 of the active sub-automaton if it is in an accepting state, 902 and into a new sub-automaton if no sub-automaton is active. 903 """ 904 return self.__subAutomata
905 - def _set_subAutomata (self, automata):
906 self.__subAutomata = list(automata)
907 subAutomata = property(__get_subAutomata) 908
910 """Create a transition back to the containing configuration. 911 912 This is done when a configuration is in an accepting state and 913 there are candidate transitions to other states that must be 914 considered. The transition does not consume a symbol.""" 915 assert self.__superConfiguration is not None 916 return Transition(self.__superConfiguration.__state, set(), layer_link=self.__superConfiguration)
917
918 - def leaveAutomaton (self, sub_configuration):
919 """Execute steps to leave a sub-automaton. 920 921 @param sub_configuration: The configuration associated with 922 the automata that has completed. 923 924 @return: C{self}""" 925 assert sub_configuration.__superConfiguration == self 926 self.__subConfiguration = None 927 return self
928
929 - def enterAutomaton (self, automaton):
930 """Execute steps to enter a new automaton. 931 932 The new automaton is removed from the set of remaining 933 automata for the current state, and a new configuration 934 created. No transition is made in that new configuration. 935 936 @param automaton: The automaton to be entered 937 938 @return: The configuration that executes the new automaton as 939 a sub-configuration of C{self}.""" 940 assert self.__subConfiguration is None 941 assert self.__subAutomata is not None 942 self.__subAutomata.remove(automaton) 943 self.__subConfiguration = Configuration(automaton) 944 self.__subConfiguration.__superConfiguration = self 945 return self.__subConfiguration
946
947 - def satisfies (self, transition):
949
950 - def reset (self):
951 fac = self.__automaton 952 self.__state = None 953 self.__counterValues = dict(zip(fac.counterConditions, len(fac.counterConditions) * (1,))) 954 self.__subConfiguration = None 955 self.__subAutomata = None
956
957 - def candidateTransitions (self, symbol=None):
958 """Return list of viable transitions on C{symbol} 959 960 The transitions that are structurally permitted from this 961 state, in order, filtering out those transitions where the 962 update instruction is not satisfied by the configuration 963 counter values and optionally those for which the symbol does 964 not match. 965 966 @param symbol: A symbol through which a transition from this 967 state is intended. A value of C{None} indicates that the set 968 of transitions should ignore the symbol; candidates are still 969 filtered based on the counter state of the configuration. 970 971 @return: A list of L{Transition} instances permitted from the 972 current configuration. If C{symbol} is not C{None}, 973 transitions that would not accept the symbol are excluded. 974 Any transition that would require an unsatisfied counter 975 update is also excluded. Non-deterministic automata may 976 result in a lits with multiple members. """ 977 978 fac = self.__automaton 979 transitions = [] 980 if symbol is None: 981 match_filter = lambda _xit: True 982 else: 983 match_filter = lambda _xit: _xit.consumingState().match(symbol) 984 update_filter = lambda _xit: _xit.satisfiedBy(self) 985 986 if self.__state is None: 987 # Special-case the initial entry to the topmost configuration 988 transitions.extend(fac.initialTransitions) 989 elif (self.__subConfiguration is not None) and not self.__subConfiguration.isAccepting(): 990 # If there's an active subconfiguration that is not in an 991 # accepting state, we can't do anything at this level. 992 pass 993 else: 994 # Normally include transitions at this level, but in some 995 # cases they are not permitted. 996 include_local = True 997 if self.__subAutomata: 998 # Disallow transitions in this level if there are 999 # subautomata that require symbols before a transition 1000 # out of this node is allowed. 1001 (include_local, sub_initial) = self.__state.subAutomataInitialTransitions(self.__subAutomata) 1002 transitions.extend(map(lambda _xit: _xit.makeEnterAutomatonTransition(), sub_initial)) 1003 if include_local: 1004 # Transitions within this layer 1005 for xit in filter(update_filter, self.__state.transitionSet): 1006 if xit.consumingState() is not None: 1007 transitions.append(xit) 1008 else: 1009 # The transition did not consume a symbol, so we have to find 1010 # one that does, from among the subautomata of the destination. 1011 # We do not care if the destination is nullable; alternatives 1012 # to it are already being handled with different transitions. 1013 (_, sub_initial) = xit.destination.subAutomataInitialTransitions() 1014 transitions.extend(map(lambda _xit: xit.chainTo(_xit.makeEnterAutomatonTransition()), sub_initial)) 1015 if (self.__superConfiguration is not None) and self.isAccepting(): 1016 # Transitions that leave this automaton 1017 lxit = self.makeLeaveAutomatonTransition() 1018 supxit = self.__superConfiguration.candidateTransitions(symbol) 1019 transitions.extend(map(lambda _sx: lxit.chainTo(_sx), supxit)) 1020 assert len(frozenset(transitions)) == len(transitions) 1021 return filter(update_filter, filter(match_filter, transitions))
1022
1023 - def acceptableSymbols (self):
1024 return [ _xit.consumedSymbol() for _xit in self.candidateTransitions()]
1025
1026 - def step (self, symbol):
1027 transitions = self.candidateTransitions(symbol) 1028 if 0 == len(transitions): 1029 raise UnrecognizedSymbolError(self, symbol) 1030 if 1 < len(transitions): 1031 raise NondeterministicSymbolError(self, symbol) 1032 return transitions[0].apply(self)
1033
1034 - def isInitial (self):
1035 """Return C{True} iff no transitions have ever been made.""" 1036 return self.__state is None
1037
1038 - def isAccepting (self):
1039 """Return C{True} iff the automaton is in an accepting state.""" 1040 if self.__state is not None: 1041 # Any active sub-configuration must be accepting 1042 if (self.__subConfiguration is not None) and not self.__subConfiguration.isAccepting(): 1043 return False 1044 # Any unprocessed sub-automata must be nullable 1045 if self.__subAutomata is not None: 1046 if not functools.reduce(operator.and_, map(lambda _sa: _sa.nullable, self.__subAutomata), True): 1047 return False 1048 # This state must be accepting 1049 return self.__state.isAccepting(self.__counterValues) 1050 # Accepting without any action requires nullable automaton 1051 return self.__automaton.nullable
1052
1053 - def __init__ (self, automaton, super_configuration=None):
1054 self.__automaton = automaton 1055 self.__superConfiguration = super_configuration 1056 self.reset()
1057
1058 - def clone (self, clone_map=None):
1059 """Clone a configuration and its descendents. 1060 1061 This is used for parallel execution where a configuration has 1062 multiple candidate transitions and must follow all of them. 1063 It clones the entire chain of configurations through 1064 multiple layers. 1065 1066 @param clone_map: Optional map into which the translation from 1067 the original configuration object to the corresponding cloned 1068 configuration object can be reconstructed, e.g. when applying 1069 a transition that includes automata exits referencing 1070 superconfigurations from the original configuration. 1071 """ 1072 if clone_map is None: 1073 clone_map = {} 1074 root = self 1075 while root.__superConfiguration is not None: 1076 root = root.__superConfiguration 1077 root = root._clone(clone_map, None) 1078 return clone_map.get(self)
1079
1080 - def _clone (self, clone_map, super_configuration):
1081 assert not self in clone_map 1082 other = type(self)(self.__automaton) 1083 clone_map[self] = other 1084 other.__state = self.__state 1085 other.__counterValues = self.__counterValues.copy() 1086 other.__superConfiguration = super_configuration 1087 if self.__subAutomata is not None: 1088 other.__subAutomata = self.__subAutomata[:] 1089 if self.__subConfiguration: 1090 other.__subConfiguration = self.__subConfiguration._clone(clone_map, other) 1091 return other
1092
1093 - def __str__ (self):
1094 return '%s: %s' % (self.__state, ' ; '.join([ '%s=%u' % (_c,_v) for (_c,_v) in self.__counterValues.iteritems()]))
1095
1096 -class MultiConfiguration (Configuration_ABC):
1097 """Support parallel execution of state machine. 1098 1099 This holds a set of configurations, and executes each transition 1100 on each one. Configurations which fail to accept a step are 1101 silently dropped; only if this results in no remaining 1102 configurations will L{UnrecognizedSymbolError} be raised. If a 1103 step admits multiple valid transitions, a configuration is added 1104 for each one. 1105 1106 See L{pyxb.binding.content.AutomatonConfiguration} for an 1107 alternative solution which holds actions associated with the 1108 transition until the non-determinism is resolved.""" 1109 1110 __configurations = None 1111
1112 - def __init__ (self, configuration):
1114
1115 - def acceptableSymbols (self):
1116 acceptable = [] 1117 for cfg in self.__configurations: 1118 acceptable.extend(cfg.acceptableSymbols()) 1119 return acceptable
1120
1121 - def step (self, symbol):
1122 next_configs = [] 1123 for cfg in self.__configurations: 1124 transitions = cfg.candidateTransitions(symbol) 1125 if 0 == len(transitions): 1126 pass 1127 elif 1 == len(transitions): 1128 next_configs.append(transitions[0].apply(cfg)) 1129 else: 1130 for transition in transitions: 1131 clone_map = {} 1132 ccfg = cfg.clone(clone_map) 1133 next_configs.append(transition.apply(ccfg, clone_map)) 1134 if 0 == len(next_configs): 1135 raise UnrecognizedSymbolError(self, symbol) 1136 assert len(frozenset(next_configs)) == len(next_configs) 1137 self.__configurations = next_configs 1138 return self
1139
1140 - def acceptingConfigurations (self):
1141 """Return the set of configurations that are in an accepting state. 1142 1143 Note that some of the configurations may be within a 1144 sub-automaton; their presence in the return value is because 1145 the root configuration is also accepting.""" 1146 accepting = [] 1147 for cfg in self.__configurations: 1148 rcfg = cfg 1149 # Rule out configurations that are accepting within their 1150 # automaton, but not in the containing automaton. 1151 while rcfg.superConfiguration is not None: 1152 rcfg = rcfg.superConfiguration 1153 if rcfg.isAccepting(): 1154 accepting.append(cfg) 1155 return accepting
1156
1157 -class Automaton (object):
1158 """Representation of a Finite Automaton with Counters. 1159 1160 This has all the standard FAC elements, plus links to other 1161 states/automata as required to support the nested automata 1162 construct used for matching unordered catenation terms.""" 1163 __states = None
1164 - def __get_states (self):
1165 """The set of L{State}s in the automaton. 1166 1167 These correspond essentially to marked symbols in the original 1168 regular expression, or L{element 1169 declarations<pyxb.xmlschema.structures.ElementDeclaration>} in 1170 an XML schema. 1171 1172 @note: These are conceptually a set and are stored that way. 1173 When an L{Automaton} is constructed the incoming states should 1174 be passed as a list so the calculated initial transitions are 1175 executed in a deterministic order.""" 1176 return self.__states
1177 states = property(__get_states) 1178 1179 __counterConditions = None
1180 - def __get_counterConditions (self):
1181 """The set of L{CounterCondition}s in the automaton. 1182 1183 These are marked positions in the regular expression, or 1184 L{particles<pyxb.xmlschema.structures.Particle>} in an XML 1185 schema, paired with their occurrence constraints.""" 1186 return self.__counterConditions
1187 counterConditions = property(__get_counterConditions) 1188 1189 __nullable = None
1190 - def __get_nullable (self):
1191 """C{True} iff the automaton accepts the empty string.""" 1192 return self.__nullable
1193 nullable = property(__get_nullable) 1194 1195 __initialTransitions = None
1196 - def __get_initialTransitions (self):
1197 """The set of transitions that may be made to enter the automaton. 1198 1199 These are full transitions, including chains into subautomata 1200 if an initial state represents a node with sub-automata. 1201 1202 @note: As with L{State.transitionSet}, the set is represented 1203 as a list to preserve priority when resolving 1204 non-deterministic matches.""" 1205 return self.__initialTransitions
1206 initialTransitions = property(__get_initialTransitions) 1207 1208 __containingState = None
1209 - def __get_containingState (self):
1210 """The L{State} instance for which this is a sub-automaton. 1211 1212 C{None} if this is not a sub-automaton.""" 1213 return self.__containingState
1214 containingState = property(__get_containingState) 1215 1216 __finalStates = None
1217 - def __get_finalStates (self):
1218 """The set of L{State} members which can terminate a match.""" 1219 return self.__finalStates
1220 finalStates = property(__get_finalStates) 1221
1222 - def __init__ (self, states, counter_conditions, nullable, containing_state=None):
1223 self.__states = frozenset(states) 1224 for st in self.__states: 1225 st._set_automaton(self) 1226 self.__counterConditions = frozenset(counter_conditions) 1227 self.__nullable = nullable 1228 self.__containingState = containing_state 1229 xit = [] 1230 fnl = set() 1231 # Iterate over states, not self.__states, in case the input was a list. 1232 # This way we preserve the priority for initial transitions. 1233 for s in states: 1234 if s.isInitial: 1235 xit.extend(s.automatonEntryTransitions) 1236 if s.finalUpdate is not None: 1237 fnl.add(s) 1238 self.__initialTransitions = xit 1239 self.__finalStates = frozenset(fnl)
1240
1241 - def newConfiguration (self):
1242 """Return a new L{Configuration} instance for this automaton.""" 1243 return Configuration(self)
1244
1245 - def __str__ (self):
1246 rv = [] 1247 rv.append('sigma = %s' % (' '.join(map(lambda _s: str(_s.symbol), self.__states)))) 1248 rv.append('states = %s' % (' '.join(map(str, self.__states)))) 1249 for s in self.__states: 1250 if s.subAutomata is not None: 1251 for i in xrange(len(s.subAutomata)): 1252 rv.append('SA %s.%u is %x:\n ' % (str(s), i, id(s.subAutomata[i])) + '\n '.join(str(s.subAutomata[i]).split('\n'))) 1253 rv.append('counters = %s' % (' '.join(map(str, self.__counterConditions)))) 1254 rv.append('initial = %s' % (' ; '.join([ '%s on %s' % (_s, _s.symbol) for _s in filter(lambda _s: _s.isInitial, self.__states)]))) 1255 rv.append('initial transitions:\n%s' % ('\n'.join(map(str, self.initialTransitions)))) 1256 rv.append('States:') 1257 for s in self.__states: 1258 rv.append('%s: %s' % (s, s._facText())) 1259 return '\n'.join(rv)
1260
1261 -class Node (object):
1262 """Abstract class for any node in the term tree. 1263 1264 In its original form a B{position} (C{pos}) is a tuple of 1265 non-negative integers comprising a path from a node in the term 1266 tree. It identifies a node in the tree. After the FAC has been 1267 constructed, only positions that are leaf nodes in the term tree 1268 remain, and the corresponding symbol value (Python instance) is 1269 used as the position. 1270 1271 An B{update instruction} (C{psi}) is a map from positions to 1272 either L{Node.RESET} or L{Node.INCREMENT}. It identifies actions 1273 to be taken on the counter states corresponding to the positions 1274 in its domain. 1275 1276 A B{transition} is a pair containing a position and an update 1277 instruction. It identifies a potential next node in the state and 1278 the updates that are to be performed if the transition is taken. 1279 1280 A B{follow value} is a map from a position to a set of transitions 1281 that may originate from the pos. This set is represented as a 1282 Python list since update instructions are dicts and cannot be 1283 hashed. 1284 """ 1285 1286 _Precedence = None 1287 """An integral value used for parenthesizing expressions. 1288 1289 A subterm that has a precedence less than that of its containing 1290 term must be enclosed in parentheses when forming a text 1291 expression representing the containing term.""" 1292 1293 RESET = False 1294 """An arbitrary value representing reset of a counter.""" 1295 1296 INCREMENT = True 1297 """An arbitrary value representing increment of a counter.""" 1298
1299 - def __init__ (self, **kw):
1300 """Create a FAC term-tree node. 1301 1302 @keyword metadata: Any application-specific metadata retained in 1303 the term tree for transfer to the resulting automaton.""" 1304 self.__metadata = kw.get('metadata')
1305
1306 - def clone (self, *args, **kw):
1307 """Create a deep copy of the node. 1308 1309 All term-tree--related attributes and properties are replaced 1310 with deep clones. Other attributes are preserved. 1311 1312 @param args: A tuple of arguments to be passed to the instance 1313 constructor. 1314 1315 @param kw: A dict of keywords to be passed to the instance 1316 constructor. 1317 1318 @note: Subclasses should pre-extend this method to augment the 1319 C{args} and C{kw} parameters as necessary to match the 1320 expectations of the C{__init__} method of the class being 1321 cloned.""" 1322 kw.setdefault('metadata', self.metadata) 1323 return type(self)(*args, **kw)
1324 1325 __metadata = None
1326 - def __get_metadata (self):
1327 """Application-specific metadata provided during construction.""" 1328 return self.__metadata
1329 metadata = property(__get_metadata) 1330 1331 __first = None
1332 - def __get_first (self):
1333 """The I{first} set for the node. 1334 1335 This is the set of positions leading to symbols that can 1336 appear first in a string matched by an execution starting at 1337 the node.""" 1338 if self.__first is None: 1339 self.__first = frozenset(self._first()) 1340 return self.__first
1341 first = property(__get_first) 1342
1343 - def _first (self):
1344 """Abstract method that defines L{first} for the subclass. 1345 1346 The return value should be an iterable of tuples of integers 1347 denoting paths from this node through the term tree to a 1348 symbol.""" 1349 raise NotImplementedError('%s.first' % (type(self).__name__,))
1350 1351 __last = None
1352 - def __get_last (self):
1353 """The I{last} set for the node. 1354 1355 This is the set of positions leading to symbols that can 1356 appear last in a string matched by an execution starting at 1357 the node.""" 1358 if self.__last is None: 1359 self.__last = frozenset(self._last()) 1360 return self.__last
1361 last = property(__get_last) 1362
1363 - def _last (self):
1364 """Abstract method that defines L{last} for the subclass. 1365 1366 The return value should be an iterable of tuples of integers 1367 denoting paths from this node through the term tree to a 1368 symbol.""" 1369 raise NotImplementedError('%s.last' % (type(self).__name__,))
1370 1371 __nullable = None
1372 - def __get_nullable (self):
1373 """C{True} iff the empty string is accepted by this node.""" 1374 if self.__nullable is None: 1375 self.__nullable = self._nullable() 1376 return self.__nullable
1377 nullable = property(__get_nullable) 1378
1379 - def _nullable (self):
1380 """Abstract method that defines L{nullable} for the subclass. 1381 1382 The return value should be C{True} or C{False}.""" 1383 raise NotImplementedError('%s.nullable' % (type(self).__name__,))
1384 1385 __follow = None
1386 - def __get_follow (self):
1387 """The I{follow} map for the node.""" 1388 if self.__follow is None: 1389 self.__follow = self._follow() 1390 return self.__follow
1391 follow = property(__get_follow) 1392
1393 - def _follow (self):
1394 """Abstract method that defines L{follow} for the subclass. 1395 1396 The return value should be a map from tuples of integers (positions) 1397 to a list of transitions, where a transition is a position and 1398 an update instruction.""" 1399 raise NotImplementedError('%s.follow' % (type(self).__name__,))
1400
1401 - def reset (self):
1402 """Reset any term-tree state associated with the node. 1403 1404 Any change to the structure of the term tree in which the node 1405 appears invalidates memoized first/follow sets and related 1406 information. This method clears all that data so it can be 1407 recalculated. It does not clear the L{metadata} link, or any 1408 existing structural data.""" 1409 self.__first = None 1410 self.__last = None 1411 self.__nullable = None 1412 self.__follow = None 1413 self.__counterPositions = None
1414
1415 - def walkTermTree (self, pre, post, arg):
1416 """Utility function for term tree processing. 1417 1418 @param pre: a callable that, unless C{None}, is invoked at 1419 each node C{n} with parameters C{n}, C{pos}, and C{arg}, where 1420 C{pos} is the tuple of integers identifying the path from the 1421 node at on which this method was invoked to the node being 1422 processed. The invocation occurs before processing any 1423 subordinate nodes. 1424 1425 @param post: as with C{pre} but invocation occurs after 1426 processing any subordinate nodes. 1427 1428 @param arg: a value passed to invocations of C{pre} and 1429 C{post}.""" 1430 self._walkTermTree((), pre, post, arg)
1431
1432 - def _walkTermTree (self, position, pre, post, arg):
1433 """Abstract method implementing L{walkTermTree} for the subclass.""" 1434 raise NotImplementedError('%s.walkTermTree' % (type(self).__name__,))
1435 1436 __posNodeMap = None
1437 - def __get_posNodeMap (self):
1438 """A map from positions to nodes in the term tree.""" 1439 if self.__posNodeMap is None: 1440 pnm = { } 1441 self.walkTermTree(lambda _n,_p,_a: _a.setdefault(_p, _n), None, pnm) 1442 self.__posNodeMap = pnm 1443 return self.__posNodeMap
1444 posNodeMap = property(__get_posNodeMap) 1445 1446 __nodePosMap = None
1447 - def __get_nodePosMap (self):
1448 """A map from nodes to their position in the term tree.""" 1449 if self.__nodePosMap is None: 1450 npm = {} 1451 for (p,n) in self.posNodeMap.iteritems(): 1452 npm[n] = p 1453 self.__nodePosMap = npm 1454 return self.__nodePosMap
1455 nodePosMap = property(__get_nodePosMap) 1456 1457 @classmethod
1458 - def _PosConcatPosSet (cls, pos, pos_set):
1459 """Implement definition 11.1 in B{HOV09}.""" 1460 return frozenset([ pos + _mp for _mp in pos_set ])
1461 1462 @classmethod
1463 - def _PosConcatUpdateInstruction (cls, pos, psi):
1464 """Implement definition 11.2 in B{HOV09}""" 1465 rv = {} 1466 for (q, v) in psi.iteritems(): 1467 rv[pos + q] = v 1468 return rv
1469 1470 @classmethod
1471 - def _PosConcatTransitionSet (cls, pos, transition_set):
1472 """Implement definition 11.3 in B{HOV09}""" 1473 ts = [] 1474 for (q, psi) in transition_set: 1475 ts.append((pos + q, cls._PosConcatUpdateInstruction(pos, psi) )) 1476 return ts
1477
1478 - def __resetAndValidate (self, node, pos, visited_nodes):
1479 if node in visited_nodes: 1480 raise InvalidTermTreeError(self, node) 1481 node.reset() 1482 visited_nodes.add(node)
1483
1484 - def buildAutomaton (self, state_ctor=State, ctr_cond_ctor=CounterCondition, containing_state=None):
1485 # Validate that the term tree is in fact a tree. A DAG does 1486 # not work. If the tree had cycles, the automaton build 1487 # wouldn't even return. 1488 self.walkTermTree(self.__resetAndValidate, None, set()) 1489 1490 counter_map = { } 1491 for pos in self.counterPositions: 1492 nci = self.posNodeMap.get(pos) 1493 assert isinstance(nci, NumericalConstraint) 1494 assert nci not in counter_map 1495 counter_map[pos] = ctr_cond_ctor(nci.min, nci.max, nci.metadata) 1496 counters = counter_map.values() 1497 1498 state_map = { } 1499 for pos in self.follow.iterkeys(): 1500 sym = self.posNodeMap.get(pos) 1501 assert isinstance(sym, LeafNode) 1502 assert sym not in state_map 1503 1504 # The state may be an initial state if it is in the first 1505 # set for the root of the term tree. 1506 is_initial = pos in self.first 1507 1508 # The state may be a final state if it is nullable or is 1509 # in the last set of the term tree. 1510 final_update = None 1511 if (() == pos and sym.nullable) or (pos in self.last): 1512 # Acceptance is further constrained by the counter 1513 # values satisfying an update rule that would reset 1514 # all counters that are relevant at the state. 1515 final_update = set() 1516 for nci in map(counter_map.get, self.counterSubPositions(pos)): 1517 final_update.add(UpdateInstruction(nci, False)) 1518 state_map[pos] = state_ctor(sym.metadata, is_initial=is_initial, final_update=final_update, is_unordered_catenation=isinstance(sym, All)) 1519 if isinstance(sym, All): 1520 state_map[pos]._set_subAutomata(*map(lambda _s: _s.buildAutomaton(state_ctor, ctr_cond_ctor, containing_state=state_map[pos]), sym.terms)) 1521 states = state_map.values() 1522 1523 for (spos, transition_set) in self.follow.iteritems(): 1524 src = state_map[spos] 1525 phi = [] 1526 for (dpos, psi) in transition_set: 1527 dst = state_map[dpos] 1528 uiset = set() 1529 for (counter, action) in psi.iteritems(): 1530 uiset.add(UpdateInstruction(counter_map[counter], self.INCREMENT == action)) 1531 phi.append(Transition(dst, uiset)) 1532 src._set_transitionSet(phi) 1533 1534 return Automaton(states, counters, self.nullable, containing_state=containing_state)
1535 1536 __counterPositions = None
1537 - def __get_counterPositions (self):
1538 """Implement definition 13.1 from B{HOV09}. 1539 1540 The return value is the set of all positions leading to 1541 L{NumericalConstraint} nodes for which either the minimum 1542 value is not 1 or the maximum value is not unbounded.""" 1543 if self.__counterPositions is None: 1544 cpos = [] 1545 self.walkTermTree(lambda _n,_p,_a: \ 1546 isinstance(_n, NumericalConstraint) \ 1547 and ((1 != _n.min) \ 1548 or (_n.max is not None)) \ 1549 and _a.append(_p), 1550 None, cpos) 1551 self.__counterPositions = frozenset(cpos) 1552 return self.__counterPositions
1553 counterPositions = property(__get_counterPositions) 1554
1555 - def counterSubPositions (self, pos):
1556 """Implement definition 13.2 from B{HOV09}. 1557 1558 This is the subset of L{counterPositions} that occur along the 1559 path to C{pos}.""" 1560 rv = set() 1561 for cpos in self.counterPositions: 1562 if cpos == pos[:len(cpos)]: 1563 rv.add(cpos) 1564 return frozenset(rv)
1565
1566 - def _facToString (self):
1567 """Obtain a description of the FAC in text format. 1568 1569 This is a diagnostic tool, returning first, last, and follow 1570 maps using positions.""" 1571 rv = [] 1572 rv.append('r\t= %s' % (str(self),)) 1573 states = list(self.follow.iterkeys()) 1574 rv.append('sym(r)\t= %s' % (' '.join(map(str, map(self.posNodeMap.get, states))))) 1575 rv.append('first(r)\t= %s' % (' '.join(map(str, self.first)))) 1576 rv.append('last(r)\t= %s' % (' '.join(map(str, self.last)))) 1577 rv.append('C\t= %s' % (' '.join(map(str, self.counterPositions)))) 1578 for pos in self.first: 1579 rv.append('qI(%s) -> %s' % (self.posNodeMap[pos].metadata, str(pos))) 1580 for spos in states: 1581 for (dpos, transition_set) in self.follow[spos]: 1582 dst = self.posNodeMap[dpos] 1583 uv = [] 1584 for (c, u) in transition_set.iteritems(): 1585 uv.append('%s %s' % (u == self.INCREMENT and "inc" or "rst", str(c))) 1586 rv.append('%s -%s-> %s ; %s' % (str(spos), dst.metadata, str(dpos), ' ; '.join(uv))) 1587 return '\n'.join(rv)
1588
1589 -class MultiTermNode (Node):
1590 """Intermediary for nodes that have multiple child nodes.""" 1591 1592 __terms = None
1593 - def __get_terms (self):
1594 """The set of subordinate terms of the current node.""" 1595 return self.__terms
1596 terms = property(__get_terms) 1597
1598 - def __init__ (self, *terms, **kw):
1599 """Term that collects an ordered sequence of terms. 1600 1601 The terms are provided as arguments. All must be instances of 1602 a subclass of L{Node}.""" 1603 super(MultiTermNode, self).__init__(**kw) 1604 self.__terms = terms
1605
1606 - def clone (self):
1607 cterms = map(lambda _s: _s.clone(), self.__terms) 1608 return super(MultiTermNode, self).clone(*cterms)
1609
1610 - def _walkTermTree (self, position, pre, post, arg):
1611 if pre is not None: 1612 pre(self, position, arg) 1613 for c in xrange(len(self.__terms)): 1614 self.__terms[c]._walkTermTree(position + (c,), pre, post, arg) 1615 if post is not None: 1616 post(self, position, arg)
1617
1618 -class LeafNode (Node):
1619 """Intermediary for nodes that have no child nodes."""
1620 - def _first (self):
1621 return [()]
1622 - def _last (self):
1623 return [()]
1624 - def _nullable (self):
1625 return False
1626 - def _follow (self):
1627 return { (): frozenset() }
1628
1629 - def _walkTermTree (self, position, pre, post, arg):
1630 if pre is not None: 1631 pre(self, position, arg) 1632 if post is not None: 1633 post(self, position, arg)
1634
1635 -class NumericalConstraint (Node):
1636 """A term with a numeric range constraint. 1637 1638 This corresponds to a "particle" in the XML Schema content model.""" 1639 1640 _Precedence = -1 1641 1642 __min = None
1643 - def __get_min (self):
1644 return self.__min
1645 min = property(__get_min) 1646 1647 __max = None
1648 - def __get_max (self):
1649 return self.__max
1650 max = property(__get_max) 1651 1652 __term = None
1653 - def __get_term (self):
1654 return self.__term
1655 term = property(__get_term) 1656
1657 - def __init__ (self, term, min=0, max=1, **kw):
1658 """Term with a numerical constraint. 1659 1660 @param term: A term, the number of appearances of which is 1661 constrained in this term. 1662 @type term: L{Node} 1663 1664 @keyword min: The minimum number of occurrences of C{term}. 1665 The value must be non-negative. 1666 1667 @keyword max: The maximum number of occurrences of C{term}. 1668 The value must be positive (in which case it must also be no 1669 smaller than C{min}), or C{None} to indicate an unbounded 1670 number of occurrences.""" 1671 super(NumericalConstraint, self).__init__(**kw) 1672 self.__term = term 1673 self.__min = min 1674 self.__max = max
1675
1676 - def clone (self):
1677 return super(NumericalConstraint, self).clone(self.__term, self.__min, self.__max)
1678
1679 - def _first (self):
1680 return [ (0,) + _fc for _fc in self.__term.first ]
1681
1682 - def _last (self):
1683 return [ (0,) + _lc for _lc in self.__term.last ]
1684
1685 - def _nullable (self):
1686 return (0 == self.__min) or self.__term.nullable
1687
1688 - def _follow (self):
1689 rv = {} 1690 pp = (0,) 1691 last_r1 = set(self.__term.last) 1692 for (q, transition_set) in self.__term.follow.iteritems(): 1693 rv[pp+q] = self._PosConcatTransitionSet(pp, transition_set) 1694 if q in last_r1: 1695 last_r1.remove(q) 1696 for sq1 in self.__term.first: 1697 q1 = pp+sq1 1698 psi = {} 1699 for p1 in self.__term.counterSubPositions(q): 1700 psi[pp+p1] = self.RESET 1701 if (1 != self.min) or (self.max is not None): 1702 psi[()] = self.INCREMENT 1703 rv[pp+q].append((q1, psi)) 1704 assert not last_r1 1705 return rv
1706
1707 - def _walkTermTree (self, position, pre, post, arg):
1708 if pre is not None: 1709 pre(self, position, arg) 1710 self.__term._walkTermTree(position + (0,), pre, post, arg) 1711 if post is not None: 1712 post(self, position, arg)
1713
1714 - def __str__ (self):
1715 rv = str(self.__term) 1716 if self.__term._Precedence < self._Precedence: 1717 rv = '(' + rv + ')' 1718 rv += '^(%u,' % (self.__min,) 1719 if self.__max is not None: 1720 rv += '%u' % (self.__max) 1721 return rv + ')'
1722
1723 -class Choice (MultiTermNode):
1724 """A term that may be any one of a set of terms. 1725 1726 This term matches if any one of its contained terms matches.""" 1727 1728 _Precedence = -3 1729
1730 - def __init__ (self, *terms, **kw):
1731 """Term that selects one of a set of terms. 1732 1733 The terms are provided as arguments. All must be instances of 1734 a subclass of L{Node}.""" 1735 super(Choice, self).__init__(*terms, **kw)
1736
1737 - def _first (self):
1738 rv = set() 1739 for c in xrange(len(self.terms)): 1740 rv.update([ (c,) + _fc for _fc in self.terms[c].first]) 1741 return rv
1742
1743 - def _last (self):
1744 rv = set() 1745 for c in xrange(len(self.terms)): 1746 rv.update([ (c,) + _lc for _lc in self.terms[c].last]) 1747 return rv
1748
1749 - def _nullable (self):
1750 for t in self.terms: 1751 if t.nullable: 1752 return True 1753 return False
1754
1755 - def _follow (self):
1756 rv = {} 1757 for c in xrange(len(self.terms)): 1758 for (q, transition_set) in self.terms[c].follow.iteritems(): 1759 pp = (c,) 1760 rv[pp + q] = self._PosConcatTransitionSet(pp, transition_set) 1761 return rv
1762
1763 - def __str__ (self):
1764 elts = [] 1765 for t in self.terms: 1766 if t._Precedence < self._Precedence: 1767 elts.append('(' + str(t) + ')') 1768 else: 1769 elts.append(str(t)) 1770 return '+'.join(elts)
1771
1772 -class Sequence (MultiTermNode):
1773 """A term that is an ordered sequence of terms.""" 1774 1775 _Precedence = -2 1776
1777 - def __init__ (self, *terms, **kw):
1778 """Term that collects an ordered sequence of terms. 1779 1780 The terms are provided as arguments. All must be instances of 1781 a subclass of L{Node}.""" 1782 super(Sequence, self).__init__(*terms, **kw)
1783
1784 - def _first (self):
1785 rv = set() 1786 c = 0 1787 while c < len(self.terms): 1788 t = self.terms[c] 1789 rv.update([ (c,) + _fc for _fc in t.first]) 1790 if not t.nullable: 1791 break 1792 c += 1 1793 return rv
1794
1795 - def _last (self):
1796 rv = set() 1797 c = len(self.terms) - 1 1798 while 0 <= c: 1799 t = self.terms[c] 1800 rv.update([ (c,) + _lc for _lc in t.last]) 1801 if not t.nullable: 1802 break 1803 c -= 1 1804 return rv
1805
1806 - def _nullable (self):
1807 for t in self.terms: 1808 if not t.nullable: 1809 return False 1810 return True
1811
1812 - def _follow (self):
1813 rv = {} 1814 for c in xrange(len(self.terms)): 1815 pp = (c,) 1816 for (q, transition_set) in self.terms[c].follow.iteritems(): 1817 rv[pp + q] = self._PosConcatTransitionSet(pp, transition_set) 1818 for c in xrange(len(self.terms)-1): 1819 t = self.terms[c] 1820 pp = (c,) 1821 # Link from the last of one term to the first of the next term. 1822 # Repeat while the destination term is nullable and there are 1823 # successor terms. 1824 for q in t.last: 1825 psi = {} 1826 for p1 in t.counterSubPositions(q): 1827 psi[pp + p1] = self.RESET 1828 nc = c 1829 while nc+1 < len(self.terms): 1830 nc += 1 1831 nt = self.terms[nc] 1832 for sq1 in nt.first: 1833 q1 = (nc,) + sq1 1834 rv[pp+q].append((q1, psi)) 1835 if not nt.nullable: 1836 break 1837 return rv
1838
1839 - def __str__ (self):
1840 elts = [] 1841 for t in self.terms: 1842 if t._Precedence < self._Precedence: 1843 elts.append('(' + str(t) + ')') 1844 else: 1845 elts.append(str(t)) 1846 return '.'.join(elts)
1847
1848 -class All (MultiTermNode, LeafNode):
1849 """A term that is an unordered sequence of terms. 1850 1851 Note that the inheritance structure for this node is unusual. It 1852 has multiple children when it is treated as a term tree, but is 1853 considered a leaf node when constructing an automaton. 1854 """ 1855 1856 _Precedence = 0 1857
1858 - def __init__ (self, *terms, **kw):
1859 """Term that collects an unordered sequence of terms. 1860 1861 The terms are provided as arguments. All must be instances of 1862 a subclass of L{Node}.""" 1863 super(All, self).__init__(*terms, **kw)
1864
1865 - def _nullable (self):
1866 for t in self.terms: 1867 if not t.nullable: 1868 return False 1869 return True
1870 1871 @classmethod
1872 - def CreateTermTree (cls, *terms):
1873 """Create a term tree that implements unordered catenation of 1874 the terms. 1875 1876 This expansion results in a standard choice/sequence term 1877 tree, at the cost of quadratic state expansion because terms 1878 are L{cloned<Node.clone>} as required to satisfy the tree 1879 requirements of the term tree. 1880 1881 @param terms: The tuple of terms that are elements of an 1882 accepted sequence. 1883 1884 @return: A term tree comprising a choice between sequences 1885 that connect each term to the unordered catenation of the 1886 remaining terms.""" 1887 if 1 == len(terms): 1888 return terms[0] 1889 disjuncts = [] 1890 for i in xrange(len(terms)): 1891 n = terms[i] 1892 rem = map(lambda _s: _s.clone(), terms[:i] + terms[i+1:]) 1893 disjuncts.append(Sequence(n, cls.CreateTermTree(*rem))) 1894 return Choice(*disjuncts)
1895
1896 - def __str__ (self):
1897 return u'&(' + ','.join([str(_t) for _t in self.terms]) + ')'
1898
1899 -class Symbol (LeafNode):
1900 """A leaf term that is a symbol. 1901 1902 The symbol is represented by the L{metadata} field.""" 1903 1904 _Precedence = 0 1905
1906 - def __init__ (self, symbol, **kw):
1907 kw['metadata'] = symbol 1908 super(Symbol, self).__init__(**kw)
1909
1910 - def clone (self):
1911 return super(Symbol, self).clone(self.metadata)
1912
1913 - def __str__ (self):
1914 return str(self.metadata)
1915