1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 """Finite automata classes that support content model management."""
16
17 from pyxb.xmlschema.structures import Particle, ModelGroup
18
19
20
21
23 """Represent a finite automaton.
24
25 The FiniteAutomaton instance is a map from states to sets of transitions.
26
27 A transition is a map from a key to a set of states.
28
29 States are integers. The start and end state are distinguished.
30 Transitions are by value, and are one of ElementDeclaration,
31 ModelGroup[all], and Wildcard. The value None represents an
32 epsilon transition.
33
34 """
35
36
37 __stateID = -1
38
39
40 __start = None
41
42
43 __end = None
44
48
50 """Create a new node in the automaton. No transitions are added."""
51 self.__stateID += 1
52 self.setdefault(self.__stateID, {})
53 return self.__stateID
54
56 """Obtain the start node of the automaton."""
57 return self.__start
58
60 """Obtain the end node of the automaton."""
61 return self.__end
62
64 """Add a transition on key from the source state to the destination state."""
65 assert destination is not None
66 self.setdefault(source, {}).setdefault(key, set()).add(destination)
67 return self
68
69 - def ok (self, key, source, destination):
70 """Return True iff the automaton can transition from source to
71 destination on key."""
72 return destination in self[source].get(key, set())
73
75 """Copy the given automaton into this one. Returns a pair of
76 the start and end states of the copied sub-automaton."""
77 nfa_base_id = self.__stateID+1
78 self.__stateID += len(nfa)
79 for sub_state in nfa.keys():
80 ssid = sub_state + nfa_base_id
81 ss_map = self.setdefault(ssid, {})
82 for key in nfa[sub_state]:
83 ss_map.setdefault(key, set())
84 ss_map[key] = ss_map[key].union(set([ (nfa_base_id+_i) for _i in nfa[sub_state][key]]))
85 return (nfa_base_id+nfa.start(), nfa_base_id+nfa.end())
86
88 """Determine the keys that allow transitions in the automaton."""
89 elements = set()
90 for s in self.keys():
91 transitions = self[s]
92 elements = elements.union(transitions.keys())
93 elements.discard(None)
94 return elements
95
97 """Return True iff the automaton can be traversed from start
98 to end following the given steps, including arbitrary epsilon
99 moves."""
100 reaches = self.epsilonClosure([self.start()])
101
102 for s in steps:
103 reaches = self.epsilonClosure(self.move(reaches, s))
104 return self.end() in reaches
105
106 - def move (self, states, key):
107 """Determine the set of states reachable from the input set of states by one key transition."""
108 next_states = set()
109 for s in states:
110 next_states = next_states.union(self[s].get(key, set()))
111
112 return next_states
113
115 """Calculate the epsilon closure of the given set of states."""
116 states = set(states)
117 while True:
118 new_states = states.union(self.move(states, None))
119 if states == new_states:
120 return states
121 states = new_states
122
131
133 nonfinal_states = tuple(set(self.keys()).difference(set(final_states)))
134 alphabet = self.alphabet()
135 reverse_map = self.reverseTransitions()
136 work = set([ final_states, nonfinal_states ])
137 partition = work.copy()
138 while 0 < len(work):
139 states = set(work.pop())
140
141 for key in alphabet:
142 sources = set()
143 [ sources.update(reverse_map.get(_s, {}).get(key, set())) for _s in states ]
144 new_partition = set()
145 for r in partition:
146 rs = set(r)
147 if (0 < len(sources.intersection(rs))) and (0 < len(rs.difference(sources))):
148 r1 = tuple(rs.intersection(sources))
149 r2 = tuple(rs.difference(r1))
150
151 new_partition.add(r1)
152 new_partition.add(r2)
153 if r in work:
154 work.remove(r)
155 work.add(r1)
156 work.add(r2)
157 elif len(r1) <= len(r2):
158 work.add(r1)
159 else:
160 work.add(r2)
161 else:
162 new_partition.add(r)
163 partition = new_partition
164 translate_map = { }
165 min_dfa = FiniteAutomaton()
166 for p in partition:
167 if self.start() in p:
168 new_state = min_dfa.start()
169 elif self.end() in p:
170 new_state = min_dfa.end()
171 else:
172 new_state = min_dfa.newState()
173
174 for max_state in p:
175 assert max_state not in translate_map
176 translate_map[max_state] = new_state
177
178 for f in final_states:
179 self.addTransition(None, f, self.end())
180
181 for (state, transitions) in self.items():
182 for (key, destination) in transitions.items():
183 assert 1 == len(destination)
184 d = destination.copy().pop()
185
186 min_dfa.addTransition(key, translate_map[state], translate_map[d])
187
188
189 min_dfa.addTransition(None, translate_map[self.end()], min_dfa.end())
190
191
192
193
194
195
196
197
198
199 return min_dfa
200
202 """Build a deterministic finite automaton that accepts the
203 same language as this one.
204
205 The resulting automaton has epsilon transitions only from
206 terminal states to the DFA distinguished end state."""
207
208
209 dfa = FiniteAutomaton()
210 ps0 = tuple(self.epsilonClosure([ dfa.start() ]))
211
212 pset_to_state = { ps0 : dfa.start() }
213 changing = True
214 alphabet = self.alphabet()
215 while changing:
216 changing = False
217 for (psi, dfa_state) in pset_to_state.items():
218 for key in alphabet:
219 assert key is not None
220 ns = tuple(self.epsilonClosure(self.move(psi, key)))
221 if 0 == len(ns):
222
223 continue
224
225 new_state = pset_to_state.get(ns, None)
226 if new_state is None:
227 new_state = dfa.newState()
228 pset_to_state[ns] = new_state
229
230 if not dfa.ok(key, dfa_state, new_state):
231 changing = True
232 dfa.addTransition(key, dfa_state, new_state)
233 final_states = set()
234 for (psi, dfa_state) in pset_to_state.items():
235 if self.end() in psi:
236 final_states.add(dfa_state)
237 return dfa.minimizeDFA(tuple(final_states))
238
240 states = set(self.keys())
241 elements = set()
242 for k in self.keys():
243 transitions = self[k]
244 elements = elements.union(transitions.keys())
245 for step in transitions.keys():
246 states = states.union(transitions[step])
247 states = list(states)
248 states.sort()
249 strings = []
250 for source in states:
251 if source == self.end():
252 strings.append('%s terminates' % (source,))
253 continue
254 transitions = self[source]
255 if 0 == len(transitions):
256 strings.append('%s dead-ends' % (source,))
257 continue
258 for step in transitions.keys():
259 strings.append('%s via %s to %s' % (source, step, ' '.join([ str(_s) for _s in transitions[step]])))
260 return "\n".join(strings)
261
271
273 """A list of minimized DFAs each of which is a single option within
274 an ALL model group."""
275 __particles = None
276
279
280 - def particles (self): return self.__particles
281
282 - def add (self, dfa, is_required):
284
286 """Create a NFA from a content model. Reminiscent of Thompson's
287 algorithm for creating an NFA from a regular expression. See
288 U{http://portal.acm.org/citation.cfm?doid=363387}"""
289
290
291 __nfa = None
292
295
301
303 """Interpret the term and update the NFA to support a path
304 from start to end that is consistent with the term.
305
306 Particles express looping operations with minimum and maximum
307 iteration counts.
308
309 Model groups express control structures: ordered and unordered
310 sequences, and alternatives.
311
312 Anything else is assumed to be a character in the automaton
313 alphabet.
314 """
315 if isinstance(term, Particle):
316 return self.__fromParticle(term, start, end)
317 if isinstance(term, ModelGroup):
318 return self.__fromModelGroup(term, start, end)
319 self.__nfa.addTransition(term, start, end)
320
321 - def __fromParticle (self, particle, start, end):
322 """Add transitions to interpret the particle."""
323
324
325
326
327 if 0 == particle.minOccurs():
328 self.addTransition(None, start, end)
329
330
331
332 cur_start = next_end = start
333 for step in range(0, particle.minOccurs()):
334 cur_start = next_end
335 next_end = self.__nfa.newState()
336 self.addTransition(particle.term(), cur_start, next_end)
337
338 if None is particle.maxOccurs():
339
340
341 if next_end == start:
342 self.addTransition(particle.term(), start, end)
343 next_end = end
344 self.addTransition(None, next_end, cur_start)
345 else:
346
347
348 for step in range(particle.minOccurs(), particle.maxOccurs()):
349 cur_start = next_end
350 next_end = self.__nfa.newState()
351 self.addTransition(None, cur_start, end)
352 self.addTransition(particle.term(), cur_start, next_end)
353
354 self.addTransition(None, next_end, end)
355
364
375
383
393