pyxb.binding.nfa

1 # Copyright 2009, Peter A. Bigot 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); you may 4 # not use this file except in compliance with the License. You may obtain a 5 # copy of the License at: 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 # License for the specific language governing permissions and limitations 13 # under the License. 14 15 """Finite automata classes that support content model management.""" 16 17 from pyxb.xmlschema.structures import Particle, ModelGroup 18 19 # Represent state transitions as a map from states to maps from 20 # symbols to sets of states. States are integers. 21

22 -class FiniteAutomaton (dict):

23 """Represent a finite automaton. 24 25 The FiniteAutomaton instance is a map from states to sets of transitions. 26 27 A transition is a map from a key to a set of states. 28 29 States are integers. The start and end state are distinguished. 30 Transitions are by value, and are one of ElementDeclaration, 31 ModelGroup[all], and Wildcard. The value None represents an 32 epsilon transition. 33 34 """ 35 36 # A unique identifier used for creating states 37 __stateID = -1 38 39 # The state serving as the automaton start. 40 __start = None 41 42 # The state serving as the automaton end. 43 __end = None 44

45 - def __init__ (self):

46 self.__end = self.newState() 47 self.__start = self.newState()

48

49 - def newState (self):

50 """Create a new node in the automaton. No transitions are added.""" 51 self.__stateID += 1 52 self.setdefault(self.__stateID, {}) 53 return self.__stateID

54

55 - def start (self):

56 """Obtain the start node of the automaton.""" 57 return self.__start

58

59 - def end (self):

60 """Obtain the end node of the automaton.""" 61 return self.__end

62

63 - def addTransition (self, key, source, destination):

64 """Add a transition on key from the source state to the destination state.""" 65 assert destination is not None 66 self.setdefault(source, {}).setdefault(key, set()).add(destination) 67 return self

68

69 - def ok (self, key, source, destination):

70 """Return True iff the automaton can transition from source to 71 destination on key.""" 72 return destination in self[source].get(key, set())

73

74 - def addSubAutomaton (self, nfa):

75 """Copy the given automaton into this one. Returns a pair of 76 the start and end states of the copied sub-automaton.""" 77 nfa_base_id = self.__stateID+1 78 self.__stateID += len(nfa) 79 for sub_state in nfa.keys(): 80 ssid = sub_state + nfa_base_id 81 ss_map = self.setdefault(ssid, {}) 82 for key in nfa[sub_state]: 83 ss_map.setdefault(key, set()) 84 ss_map[key] = ss_map[key].union(set([ (nfa_base_id+_i) for _i in nfa[sub_state][key]])) 85 return (nfa_base_id+nfa.start(), nfa_base_id+nfa.end())

86

87 - def alphabet (self):

88 """Determine the keys that allow transitions in the automaton.""" 89 elements = set() 90 for s in self.keys(): 91 transitions = self[s] 92 elements = elements.union(transitions.keys()) 93 elements.discard(None) 94 return elements

95

96 - def isFullPath (self, steps):

97 """Return True iff the automaton can be traversed from start 98 to end following the given steps, including arbitrary epsilon 99 moves.""" 100 reaches = self.epsilonClosure([self.start()]) 101 #print 'Starting full path from %s\n%s\n' % (reaches, self) 102 for s in steps: 103 reaches = self.epsilonClosure(self.move(reaches, s)) 104 return self.end() in reaches

105

106 - def move (self, states, key):

107 """Determine the set of states reachable from the input set of states by one key transition.""" 108 next_states = set() 109 for s in states: 110 next_states = next_states.union(self[s].get(key, set())) 111 #print 'Move from %s via %s is %s' % (states, key, next_states) 112 return next_states

113

114 - def epsilonClosure (self, states):

115 """Calculate the epsilon closure of the given set of states.""" 116 states = set(states) 117 while True: 118 new_states = states.union(self.move(states, None)) 119 if states == new_states: 120 return states 121 states = new_states

122

123 - def reverseTransitions (self):

124 reverse_map = { } 125 for state in self.keys(): 126 transitions = self[state] 127 for key in transitions.keys(): 128 [ reverse_map.setdefault(_s, {}).setdefault(key, set()).add(state) for _s in transitions[key] ] 129 #print reverse_map 130 return reverse_map

131

132 - def minimizeDFA (self, final_states):

133 nonfinal_states = tuple(set(self.keys()).difference(set(final_states))) 134 alphabet = self.alphabet() 135 reverse_map = self.reverseTransitions() 136 work = set([ final_states, nonfinal_states ]) 137 partition = work.copy() 138 while 0 < len(work): 139 states = set(work.pop()) 140 #print 'State %s, partition %s' % (states, partition) 141 for key in alphabet: 142 sources = set() 143 [ sources.update(reverse_map.get(_s, {}).get(key, set())) for _s in states ] 144 new_partition = set() 145 for r in partition: 146 rs = set(r) 147 if (0 < len(sources.intersection(rs))) and (0 < len(rs.difference(sources))): 148 r1 = tuple(rs.intersection(sources)) 149 r2 = tuple(rs.difference(r1)) 150 #print 'Split on %s: %s and %s' % (key, r1, r2) 151 new_partition.add(r1) 152 new_partition.add(r2) 153 if r in work: 154 work.remove(r) 155 work.add(r1) 156 work.add(r2) 157 elif len(r1) <= len(r2): 158 work.add(r1) 159 else: 160 work.add(r2) 161 else: 162 new_partition.add(r) 163 partition = new_partition 164 translate_map = { } 165 min_dfa = FiniteAutomaton() 166 for p in partition: 167 if self.start() in p: 168 new_state = min_dfa.start() 169 elif self.end() in p: 170 new_state = min_dfa.end() 171 else: 172 new_state = min_dfa.newState() 173 #print 'Convert %s to %s' % (p, new_state) 174 for max_state in p: 175 assert max_state not in translate_map 176 translate_map[max_state] = new_state 177 178 for f in final_states: 179 self.addTransition(None, f, self.end()) 180 #print 'DFA: %s' % (self,) 181 for (state, transitions) in self.items(): 182 for (key, destination) in transitions.items(): 183 assert 1 == len(destination) 184 d = destination.copy().pop() 185 #print 'Old: %s via %s to %s\nNew: %s via %s to %s' % (state, key, d, translate_map[state], key, translate_map[d]) 186 min_dfa.addTransition(key, translate_map[state], translate_map[d]) 187 188 # Just in case self.start() and self.end() are in the same partition 189 min_dfa.addTransition(None, translate_map[self.end()], min_dfa.end()) 190 #print 'Final added %d jump to %d' % (translate_map[self.end()], min_dfa.end()) 191 192 #print 'DFA: %s' % (self,) 193 #print 'Start: %s' % (translate_map[self.start()],) 194 #print 'Final: %s' % (set([ translate_map[_s] for _s in final_states ]).pop(),) 195 #print 'Partition: %s' % (partition,) 196 #print 'Minimized: %s' % (min_dfa,) 197 #print "Resulting DFA:\n%s\n\n" % (min_dfa,) 198 #print 'Minimal DFA has %d states, original had %d' % (len(min_dfa), len(self)) 199 return min_dfa

200

201 - def buildDFA (self):

202 """Build a deterministic finite automaton that accepts the 203 same language as this one. 204 205 The resulting automaton has epsilon transitions only from 206 terminal states to the DFA distinguished end state.""" 207 208 #print "Building DFA from NFA:\n%s\n" % (self,) 209 dfa = FiniteAutomaton() 210 ps0 = tuple(self.epsilonClosure([ dfa.start() ])) 211 #print "Start state is %s" % (ps0,) 212 pset_to_state = { ps0 : dfa.start() } 213 changing = True 214 alphabet = self.alphabet() 215 while changing: 216 changing = False 217 for (psi, dfa_state) in pset_to_state.items(): 218 for key in alphabet: 219 assert key is not None 220 ns = tuple(self.epsilonClosure(self.move(psi, key))) 221 if 0 == len(ns): 222 #print 'From %s via %s is dead' % (psi, key) 223 continue 224 #print 'From %s via %s can reach %s' % (psi, key, ns) 225 new_state = pset_to_state.get(ns, None) 226 if new_state is None: 227 new_state = dfa.newState() 228 pset_to_state[ns] = new_state 229 #print "New state %d is %s" % (new_state, ns) 230 if not dfa.ok(key, dfa_state, new_state): 231 changing = True 232 dfa.addTransition(key, dfa_state, new_state) 233 final_states = set() 234 for (psi, dfa_state) in pset_to_state.items(): 235 if self.end() in psi: 236 final_states.add(dfa_state) 237 return dfa.minimizeDFA(tuple(final_states))

238

239 - def __str__ (self):

240 states = set(self.keys()) 241 elements = set() 242 for k in self.keys(): 243 transitions = self[k] 244 elements = elements.union(transitions.keys()) 245 for step in transitions.keys(): 246 states = states.union(transitions[step]) 247 states = list(states) 248 states.sort() 249 strings = [] 250 for source in states: 251 if source == self.end(): 252 strings.append('%s terminates' % (source,)) 253 continue 254 transitions = self[source] 255 if 0 == len(transitions): 256 strings.append('%s dead-ends' % (source,)) 257 continue 258 for step in transitions.keys(): 259 strings.append('%s via %s to %s' % (source, step, ' '.join([ str(_s) for _s in transitions[step]]))) 260 return "\n".join(strings)

261

262 -def _Permutations (particles):

263 if 1 == len(particles): 264 yield tuple(particles) 265 else: 266 for i in range(len(particles)): 267 this = particles[i] 268 rest = particles[:i] + particles[i+1:] 269 for p in _Permutations(rest): 270 yield (this,) + p

271

272 -class AllWalker (object):

273 """A list of minimized DFAs each of which is a single option within 274 an ALL model group.""" 275 __particles = None 276

277 - def __init__ (self):

278 self.__particles = [ ]

279

280 - def particles (self): return self.__particles

281

282 - def add (self, dfa, is_required):

283 self.__particles.append( ( dfa, is_required ) )

284

285 -class Thompson:

286 """Create a NFA from a content model. Reminiscent of Thompson's 287 algorithm for creating an NFA from a regular expression. See 288 U{http://portal.acm.org/citation.cfm?doid=363387}""" 289 290 # The NFA 291 __nfa = None 292

293 - def nfa (self):

294 return self.__nfa

295

296 - def __init__ (self, term=None):

297 self.__nfa = FiniteAutomaton() 298 if term is not None: 299 #assert isinstance(term, Particle) 300 self.addTransition(term, self.__nfa.start(), self.__nfa.end())

301

302 - def addTransition (self, term, start, end):

303 """Interpret the term and update the NFA to support a path 304 from start to end that is consistent with the term. 305 306 Particles express looping operations with minimum and maximum 307 iteration counts. 308 309 Model groups express control structures: ordered and unordered 310 sequences, and alternatives. 311 312 Anything else is assumed to be a character in the automaton 313 alphabet. 314 """ 315 if isinstance(term, Particle): 316 return self.__fromParticle(term, start, end) 317 if isinstance(term, ModelGroup): 318 return self.__fromModelGroup(term, start, end) 319 self.__nfa.addTransition(term, start, end)

320

321 - def __fromParticle (self, particle, start, end):

322 """Add transitions to interpret the particle.""" 323 324 #print '# %d to %s of %s' % (particle.minOccurs(), particle.maxOccurs(), particle.term()) 325 326 # If possible, epsilon transition straight from start to end. 327 if 0 == particle.minOccurs(): 328 self.addTransition(None, start, end) 329 330 # Add term transitions from start through the minimum number 331 # of instances of the term. 332 cur_start = next_end = start 333 for step in range(0, particle.minOccurs()): 334 cur_start = next_end 335 next_end = self.__nfa.newState() 336 self.addTransition(particle.term(), cur_start, next_end) 337 338 if None is particle.maxOccurs(): 339 # Add a back loop to repeat the last instance of the term 340 # (creating said instance, if we haven't already) 341 if next_end == start: 342 self.addTransition(particle.term(), start, end) 343 next_end = end 344 self.addTransition(None, next_end, cur_start) 345 else: 346 # Add additional terms up to the maximum, with a short-cut 347 # exit for those above the minOccurs value. 348 for step in range(particle.minOccurs(), particle.maxOccurs()): 349 cur_start = next_end 350 next_end = self.__nfa.newState() 351 self.addTransition(None, cur_start, end) 352 self.addTransition(particle.term(), cur_start, next_end) 353 # Leave the sub-FA 354 self.addTransition(None, next_end, end)

355

356 - def __fromMGSequence (self, particles, start, end):

357 # Just step from one to the next 358 #print '# Sequence of %s' % (particles,) 359 for p in particles: 360 next_state = self.__nfa.newState() 361 self.addTransition(p, start, next_state) 362 start = next_state 363 self.addTransition(None, start, end)

364

365 - def __fromMGChoice (self, particles, start, end):

366 # Start to end for each choice, but make the choice stage 367 # occur once (in case a particle adds a transition from end to 368 # start. 369 for p in particles: 370 choice_start = self.__nfa.newState() 371 choice_end = self.__nfa.newState() 372 self.addTransition(None, start, choice_start) 373 self.addTransition(p, choice_start, choice_end) 374 self.addTransition(None, choice_end, end)

375

376 - def __fromMGAll (self, particles, start, end):

377 # All is too ugly: exponential state growth. Use a special 378 # construct instead. 379 walker = AllWalker() 380 for p in particles: 381 walker.add(Thompson(p).nfa().buildDFA(), 0 < p.minOccurs()) 382 self.addTransition(walker, start, end)

383

384 - def __fromModelGroup (self, group, start, end):

385 # Do the right thing based on the model group compositor 386 if ModelGroup.C_ALL == group.compositor(): 387 return self.__fromMGAll(group.particles(), start, end) 388 if ModelGroup.C_CHOICE == group.compositor(): 389 return self.__fromMGChoice(group.particles(), start, end) 390 if ModelGroup.C_SEQUENCE == group.compositor(): 391 return self.__fromMGSequence(group.particles(), start, end) 392 assert False

393

Source Code for Module pyxb.binding.nfa