1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 """This module contains support for Unicode characters as required to
17 support the regular expression syntax defined in U{annex F
18 <http://www/Documentation/W3C/www.w3.org/TR/xmlschema-2/index.html#regexs>}
19 of the XML Schema definition.
20
21 In particular, we need to be able to identify character properties and
22 block escapes, as defined in F.1.1, by name.
23
24 - Block data: U{http://www.unicode.org/Public/3.1-Update/Blocks-4.txt}
25 - Property list data: U{http://www.unicode.org/Public/3.1-Update/PropList-3.1.0.txt}
26 - Full dataset: U{http://www.unicode.org/Public/3.1-Update/UnicodeData-3.1.0.txt}
27
28 The Unicode database active at the time XML Schema 1.0 was defined is
29 archived at
30 U{http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html},
31 and refers to U{Unicode Standard Annex #27: Unicode 3.1
32 <http://www.unicode.org/unicode/reports/tr27/>}.
33 """
34
35 import re
36 import logging
37 import pyxb.utils.utility
38 from pyxb.utils import six
39 from pyxb.utils.six.moves import xrange
40
41 _log = logging.getLogger(__name__)
42
43 SupportsWideUnicode = False
44 try:
45 re.compile(six.u('[\U0001d7ce-\U0001d7ff]'))
46 SupportsWideUnicode = True
47 except:
48 pass
49
50 import bisect
53 """Raised when some abuse of a L{CodePointSet} is detected."""
54 pass
55
56 @pyxb.utils.utility.BackfillComparisons
58 """Represent a set of Unicode code points.
59
60 Each code point is an integral value between 0 and 0x10FFFF. This
61 class is used to represent a set of code points in a manner
62 suitable for use as regular expression character sets."""
63
64 MaxCodePoint = 0x10FFFF
65 """The maximum value for a code point in the Unicode code point
66 space. This is normally 0xFFFF, because wide unicode characters
67 are generally not enabled in Python builds. If, however, they are
68 enabled, this will be the full value of 0x10FFFF."""
69
70 MaxShortCodePoint = 0xFFFF
71 if not SupportsWideUnicode:
72 MaxCodePoint = MaxShortCodePoint
73
74
75
76
77
78
79
80
81 __codepoints = None
82
84 """For testing purrposes only, access to the codepoints
85 internal representation."""
86 return self.__codepoints
87
90
94
97
108
110
111
112 if isinstance(value, tuple):
113 (s, e) = value
114 e += 1
115 elif isinstance(value, six.string_types):
116 if 1 < len(value):
117 raise TypeError()
118 s = ord(value)
119 e = s+1
120 else:
121 s = int(value)
122 e = s+1
123 if s >= e:
124 raise ValueError('codepoint range value order')
125
126
127
128 if s > self.MaxCodePoint:
129 return self
130 if e > self.MaxCodePoint:
131 e = self.MaxCodePoint+1
132
133
134 li = bisect.bisect_left(self.__codepoints, s)
135
136 ri = bisect.bisect_right(self.__codepoints, e)
137
138 case = ((li & 1) << 1) | (ri & 1)
139 if not do_add:
140 case = 3 - case
141 if 0x03 == case:
142
143 del self.__codepoints[li:ri]
144 elif 0x02 == case:
145
146 del self.__codepoints[li+1:ri]
147 self.__codepoints[li] = e
148 elif 0x01 == case:
149
150 del self.__codepoints[li+1:ri]
151 self.__codepoints[li] = s
152 else:
153
154 self.__codepoints[li:ri] = [s, e]
155 return self
156
157 - def add (self, value):
158 """Add the given value to the code point set.
159
160 @param value: An integral value denoting a code point, or a
161 tuple C{(s,e)} denoting the start and end (inclusive) code
162 points in a range.
163 @return: C{self}"""
164 return self.__mutate(value, True)
165
167 """Add multiple values to a code point set.
168
169 @param values: Either a L{CodePointSet} instance, or an iterable
170 whose members are valid parameters to L{add}.
171
172 @return: C{self}"""
173 if isinstance(values, CodePointSet):
174 self.extend(values.asTuples())
175 else:
176 for v in values:
177 self.__mutate(v, True)
178 return self
179
181 """Remove the given value from the code point set.
182
183 @param value: An integral value denoting a code point, or a tuple
184 C{(s,e)} denoting the start and end (inclusive) code points in a
185 range, or a L{CodePointSet}.
186
187 @return: C{self}"""
188 if isinstance(value, CodePointSet):
189 for v in value.asTuples():
190 self.subtract(v)
191 return self
192 return self.__mutate(value, False)
193
194
195
196
197 __XMLtoPythonREEscapedCodepoints = (
198
199
200 0,
201
202
203 ord('^'),
204
205 ord('\\'),
206
207
208 ord('['),
209
210 ord(']'),
211
212 ord('-')
213 )
214
215
216
218 rv = six.unichr(code_point)
219 if 0 == code_point:
220 rv = six.u('x00')
221 if code_point in self.__XMLtoPythonREEscapedCodepoints:
222 rv = six.unichr(0x5c) + rv
223 return rv
224
226 """Return the code point set as Unicode regular expression
227 character group consisting of a sequence of characters or
228 character ranges.
229
230 This returns a regular expression fragment using Python's
231 regular expression syntax. Note that different regular expression
232 syntaxes are not compatible, often in subtle ways.
233
234 @param with_brackets: If C{True} (default), square brackets
235 are added to enclose the returned character group."""
236 rva = []
237 if with_brackets:
238 rva.append(six.u('['))
239 for (s, e) in self.asTuples():
240 if s == e:
241 rva.append(self.__unichr(s))
242 else:
243 rva.extend([self.__unichr(s), '-', self.__unichr(e)])
244 if with_brackets:
245 rva.append(six.u(']'))
246 return six.u('').join(rva)
247
249 """Return the codepoints as tuples denoting the ranges that are in
250 the set.
251
252 Each tuple C{(s, e)} indicates that the code points from C{s}
253 (inclusive) to C{e}) (inclusive) are in the set."""
254
255 rv = []
256 start = None
257 for ri in xrange(len(self.__codepoints)):
258 if start is not None:
259 rv.append( (start, self.__codepoints[ri]-1) )
260 start = None
261 else:
262 start = self.__codepoints[ri]
263 if (start is not None) and (start <= self.MaxCodePoint):
264 rv.append( (start, self.MaxCodePoint) )
265 return rv
266
276
278 """If this set represents a single character, return it as its
279 unicode string value. Otherwise return C{None}."""
280 if (2 != len(self.__codepoints)) or (1 < (self.__codepoints[1] - self.__codepoints[0])):
281 return None
282 return six.unichr(self.__codepoints[0])
283
284 from pyxb.utils.unicode_data import PropertyMap
285 from pyxb.utils.unicode_data import BlockMap
288 """Regular expression support for XML Schema Data Types.
289
290 This class holds character classes and regular expressions used to
291 constrain the lexical space of XML Schema datatypes derived from
292 U{string<http://www.w3.org/TR/xmlschema-2/#string>}. They are
293 from U{XML 1.0 (Second
294 Edition)<http://www.w3.org/TR/2000/WD-xml-2e-20000814>} and
295 U{Namespaces in XML
296 <http://www.w3.org/TR/1999/REC-xml-names-19990114/>}.
297
298 Unlike the regular expressions used for pattern constraints in XML
299 Schema, which are derived from the Unicode 3.1 specification,
300 these are derived from the Unicode 2.0 specification.
301
302 The XML Schema definition refers explicitly to the second edition
303 of XML, so we have to use these code point sets and patterns. Be
304 aware that U{subsequent updates to the XML specification
305 <http://www.w3.org/XML/xml-V10-4e-errata#E09>} have changed the
306 corresponding patterns for other uses of XML. One significant
307 change is that the original specification, used here, does not
308 allow wide unicode characters."""
309
310 Char = CodePointSet(
311 0x0009,
312 0x000A,
313 0x000D,
314 ( 0x0020, 0xD7FF ),
315 ( 0xE000, 0xFFFD )
316 )
317 if SupportsWideUnicode:
318 Char.add( ( 1+CodePointSet.MaxShortCodePoint, CodePointSet.MaxCodePoint ) )
319
320 BaseChar = CodePointSet(
321 ( 0x0041, 0x005A ),
322 ( 0x0061, 0x007A ),
323 ( 0x00C0, 0x00D6 ),
324 ( 0x00D8, 0x00F6 ),
325 ( 0x00F8, 0x00FF ),
326 ( 0x0100, 0x0131 ),
327 ( 0x0134, 0x013E ),
328 ( 0x0141, 0x0148 ),
329 ( 0x014A, 0x017E ),
330 ( 0x0180, 0x01C3 ),
331 ( 0x01CD, 0x01F0 ),
332 ( 0x01F4, 0x01F5 ),
333 ( 0x01FA, 0x0217 ),
334 ( 0x0250, 0x02A8 ),
335 ( 0x02BB, 0x02C1 ),
336 0x0386,
337 ( 0x0388, 0x038A ),
338 0x038C,
339 ( 0x038E, 0x03A1 ),
340 ( 0x03A3, 0x03CE ),
341 ( 0x03D0, 0x03D6 ),
342 0x03DA,
343 0x03DC,
344 0x03DE,
345 0x03E0,
346 ( 0x03E2, 0x03F3 ),
347 ( 0x0401, 0x040C ),
348 ( 0x040E, 0x044F ),
349 ( 0x0451, 0x045C ),
350 ( 0x045E, 0x0481 ),
351 ( 0x0490, 0x04C4 ),
352 ( 0x04C7, 0x04C8 ),
353 ( 0x04CB, 0x04CC ),
354 ( 0x04D0, 0x04EB ),
355 ( 0x04EE, 0x04F5 ),
356 ( 0x04F8, 0x04F9 ),
357 ( 0x0531, 0x0556 ),
358 0x0559,
359 ( 0x0561, 0x0586 ),
360 ( 0x05D0, 0x05EA ),
361 ( 0x05F0, 0x05F2 ),
362 ( 0x0621, 0x063A ),
363 ( 0x0641, 0x064A ),
364 ( 0x0671, 0x06B7 ),
365 ( 0x06BA, 0x06BE ),
366 ( 0x06C0, 0x06CE ),
367 ( 0x06D0, 0x06D3 ),
368 0x06D5,
369 ( 0x06E5, 0x06E6 ),
370 ( 0x0905, 0x0939 ),
371 0x093D,
372 ( 0x0958, 0x0961 ),
373 ( 0x0985, 0x098C ),
374 ( 0x098F, 0x0990 ),
375 ( 0x0993, 0x09A8 ),
376 ( 0x09AA, 0x09B0 ),
377 0x09B2,
378 ( 0x09B6, 0x09B9 ),
379 ( 0x09DC, 0x09DD ),
380 ( 0x09DF, 0x09E1 ),
381 ( 0x09F0, 0x09F1 ),
382 ( 0x0A05, 0x0A0A ),
383 ( 0x0A0F, 0x0A10 ),
384 ( 0x0A13, 0x0A28 ),
385 ( 0x0A2A, 0x0A30 ),
386 ( 0x0A32, 0x0A33 ),
387 ( 0x0A35, 0x0A36 ),
388 ( 0x0A38, 0x0A39 ),
389 ( 0x0A59, 0x0A5C ),
390 0x0A5E,
391 ( 0x0A72, 0x0A74 ),
392 ( 0x0A85, 0x0A8B ),
393 0x0A8D,
394 ( 0x0A8F, 0x0A91 ),
395 ( 0x0A93, 0x0AA8 ),
396 ( 0x0AAA, 0x0AB0 ),
397 ( 0x0AB2, 0x0AB3 ),
398 ( 0x0AB5, 0x0AB9 ),
399 0x0ABD,
400 0x0AE0,
401 ( 0x0B05, 0x0B0C ),
402 ( 0x0B0F, 0x0B10 ),
403 ( 0x0B13, 0x0B28 ),
404 ( 0x0B2A, 0x0B30 ),
405 ( 0x0B32, 0x0B33 ),
406 ( 0x0B36, 0x0B39 ),
407 0x0B3D,
408 ( 0x0B5C, 0x0B5D ),
409 ( 0x0B5F, 0x0B61 ),
410 ( 0x0B85, 0x0B8A ),
411 ( 0x0B8E, 0x0B90 ),
412 ( 0x0B92, 0x0B95 ),
413 ( 0x0B99, 0x0B9A ),
414 0x0B9C,
415 ( 0x0B9E, 0x0B9F ),
416 ( 0x0BA3, 0x0BA4 ),
417 ( 0x0BA8, 0x0BAA ),
418 ( 0x0BAE, 0x0BB5 ),
419 ( 0x0BB7, 0x0BB9 ),
420 ( 0x0C05, 0x0C0C ),
421 ( 0x0C0E, 0x0C10 ),
422 ( 0x0C12, 0x0C28 ),
423 ( 0x0C2A, 0x0C33 ),
424 ( 0x0C35, 0x0C39 ),
425 ( 0x0C60, 0x0C61 ),
426 ( 0x0C85, 0x0C8C ),
427 ( 0x0C8E, 0x0C90 ),
428 ( 0x0C92, 0x0CA8 ),
429 ( 0x0CAA, 0x0CB3 ),
430 ( 0x0CB5, 0x0CB9 ),
431 0x0CDE,
432 ( 0x0CE0, 0x0CE1 ),
433 ( 0x0D05, 0x0D0C ),
434 ( 0x0D0E, 0x0D10 ),
435 ( 0x0D12, 0x0D28 ),
436 ( 0x0D2A, 0x0D39 ),
437 ( 0x0D60, 0x0D61 ),
438 ( 0x0E01, 0x0E2E ),
439 0x0E30,
440 ( 0x0E32, 0x0E33 ),
441 ( 0x0E40, 0x0E45 ),
442 ( 0x0E81, 0x0E82 ),
443 0x0E84,
444 ( 0x0E87, 0x0E88 ),
445 0x0E8A,
446 0x0E8D,
447 ( 0x0E94, 0x0E97 ),
448 ( 0x0E99, 0x0E9F ),
449 ( 0x0EA1, 0x0EA3 ),
450 0x0EA5,
451 0x0EA7,
452 ( 0x0EAA, 0x0EAB ),
453 ( 0x0EAD, 0x0EAE ),
454 0x0EB0,
455 ( 0x0EB2, 0x0EB3 ),
456 0x0EBD,
457 ( 0x0EC0, 0x0EC4 ),
458 ( 0x0F40, 0x0F47 ),
459 ( 0x0F49, 0x0F69 ),
460 ( 0x10A0, 0x10C5 ),
461 ( 0x10D0, 0x10F6 ),
462 0x1100,
463 ( 0x1102, 0x1103 ),
464 ( 0x1105, 0x1107 ),
465 0x1109,
466 ( 0x110B, 0x110C ),
467 ( 0x110E, 0x1112 ),
468 0x113C,
469 0x113E,
470 0x1140,
471 0x114C,
472 0x114E,
473 0x1150,
474 ( 0x1154, 0x1155 ),
475 0x1159,
476 ( 0x115F, 0x1161 ),
477 0x1163,
478 0x1165,
479 0x1167,
480 0x1169,
481 ( 0x116D, 0x116E ),
482 ( 0x1172, 0x1173 ),
483 0x1175,
484 0x119E,
485 0x11A8,
486 0x11AB,
487 ( 0x11AE, 0x11AF ),
488 ( 0x11B7, 0x11B8 ),
489 0x11BA,
490 ( 0x11BC, 0x11C2 ),
491 0x11EB,
492 0x11F0,
493 0x11F9,
494 ( 0x1E00, 0x1E9B ),
495 ( 0x1EA0, 0x1EF9 ),
496 ( 0x1F00, 0x1F15 ),
497 ( 0x1F18, 0x1F1D ),
498 ( 0x1F20, 0x1F45 ),
499 ( 0x1F48, 0x1F4D ),
500 ( 0x1F50, 0x1F57 ),
501 0x1F59,
502 0x1F5B,
503 0x1F5D,
504 ( 0x1F5F, 0x1F7D ),
505 ( 0x1F80, 0x1FB4 ),
506 ( 0x1FB6, 0x1FBC ),
507 0x1FBE,
508 ( 0x1FC2, 0x1FC4 ),
509 ( 0x1FC6, 0x1FCC ),
510 ( 0x1FD0, 0x1FD3 ),
511 ( 0x1FD6, 0x1FDB ),
512 ( 0x1FE0, 0x1FEC ),
513 ( 0x1FF2, 0x1FF4 ),
514 ( 0x1FF6, 0x1FFC ),
515 0x2126,
516 ( 0x212A, 0x212B ),
517 0x212E,
518 ( 0x2180, 0x2182 ),
519 ( 0x3041, 0x3094 ),
520 ( 0x30A1, 0x30FA ),
521 ( 0x3105, 0x312C ),
522 ( 0xAC00, 0xD7A3 )
523 )
524
525 Ideographic = CodePointSet(
526 ( 0x4E00, 0x9FA5 ),
527 0x3007,
528 ( 0x3021, 0x3029 )
529 )
530
531 Letter = CodePointSet(BaseChar).extend(Ideographic)
532
533 CombiningChar = CodePointSet(
534 ( 0x0300, 0x0345 ),
535 ( 0x0360, 0x0361 ),
536 ( 0x0483, 0x0486 ),
537 ( 0x0591, 0x05A1 ),
538 ( 0x05A3, 0x05B9 ),
539 ( 0x05BB, 0x05BD ),
540 0x05BF,
541 ( 0x05C1, 0x05C2 ),
542 0x05C4,
543 ( 0x064B, 0x0652 ),
544 0x0670,
545 ( 0x06D6, 0x06DC ),
546 ( 0x06DD, 0x06DF ),
547 ( 0x06E0, 0x06E4 ),
548 ( 0x06E7, 0x06E8 ),
549 ( 0x06EA, 0x06ED ),
550 ( 0x0901, 0x0903 ),
551 0x093C,
552 ( 0x093E, 0x094C ),
553 0x094D,
554 ( 0x0951, 0x0954 ),
555 ( 0x0962, 0x0963 ),
556 ( 0x0981, 0x0983 ),
557 0x09BC,
558 0x09BE,
559 0x09BF,
560 ( 0x09C0, 0x09C4 ),
561 ( 0x09C7, 0x09C8 ),
562 ( 0x09CB, 0x09CD ),
563 0x09D7,
564 ( 0x09E2, 0x09E3 ),
565 0x0A02,
566 0x0A3C,
567 0x0A3E,
568 0x0A3F,
569 ( 0x0A40, 0x0A42 ),
570 ( 0x0A47, 0x0A48 ),
571 ( 0x0A4B, 0x0A4D ),
572 ( 0x0A70, 0x0A71 ),
573 ( 0x0A81, 0x0A83 ),
574 0x0ABC,
575 ( 0x0ABE, 0x0AC5 ),
576 ( 0x0AC7, 0x0AC9 ),
577 ( 0x0ACB, 0x0ACD ),
578 ( 0x0B01, 0x0B03 ),
579 0x0B3C,
580 ( 0x0B3E, 0x0B43 ),
581 ( 0x0B47, 0x0B48 ),
582 ( 0x0B4B, 0x0B4D ),
583 ( 0x0B56, 0x0B57 ),
584 ( 0x0B82, 0x0B83 ),
585 ( 0x0BBE, 0x0BC2 ),
586 ( 0x0BC6, 0x0BC8 ),
587 ( 0x0BCA, 0x0BCD ),
588 0x0BD7,
589 ( 0x0C01, 0x0C03 ),
590 ( 0x0C3E, 0x0C44 ),
591 ( 0x0C46, 0x0C48 ),
592 ( 0x0C4A, 0x0C4D ),
593 ( 0x0C55, 0x0C56 ),
594 ( 0x0C82, 0x0C83 ),
595 ( 0x0CBE, 0x0CC4 ),
596 ( 0x0CC6, 0x0CC8 ),
597 ( 0x0CCA, 0x0CCD ),
598 ( 0x0CD5, 0x0CD6 ),
599 ( 0x0D02, 0x0D03 ),
600 ( 0x0D3E, 0x0D43 ),
601 ( 0x0D46, 0x0D48 ),
602 ( 0x0D4A, 0x0D4D ),
603 0x0D57,
604 0x0E31,
605 ( 0x0E34, 0x0E3A ),
606 ( 0x0E47, 0x0E4E ),
607 0x0EB1,
608 ( 0x0EB4, 0x0EB9 ),
609 ( 0x0EBB, 0x0EBC ),
610 ( 0x0EC8, 0x0ECD ),
611 ( 0x0F18, 0x0F19 ),
612 0x0F35,
613 0x0F37,
614 0x0F39,
615 0x0F3E,
616 0x0F3F,
617 ( 0x0F71, 0x0F84 ),
618 ( 0x0F86, 0x0F8B ),
619 ( 0x0F90, 0x0F95 ),
620 0x0F97,
621 ( 0x0F99, 0x0FAD ),
622 ( 0x0FB1, 0x0FB7 ),
623 0x0FB9,
624 ( 0x20D0, 0x20DC ),
625 0x20E1,
626 ( 0x302A, 0x302F ),
627 0x3099,
628 0x309A
629 )
630
631 Digit = CodePointSet(
632 ( 0x0030, 0x0039 ),
633 ( 0x0660, 0x0669 ),
634 ( 0x06F0, 0x06F9 ),
635 ( 0x0966, 0x096F ),
636 ( 0x09E6, 0x09EF ),
637 ( 0x0A66, 0x0A6F ),
638 ( 0x0AE6, 0x0AEF ),
639 ( 0x0B66, 0x0B6F ),
640 ( 0x0BE7, 0x0BEF ),
641 ( 0x0C66, 0x0C6F ),
642 ( 0x0CE6, 0x0CEF ),
643 ( 0x0D66, 0x0D6F ),
644 ( 0x0E50, 0x0E59 ),
645 ( 0x0ED0, 0x0ED9 ),
646 ( 0x0F20, 0x0F29 )
647 )
648
649 Extender = CodePointSet(
650 0x00B7,
651 0x02D0,
652 0x02D1,
653 0x0387,
654 0x0640,
655 0x0E46,
656 0x0EC6,
657 0x3005,
658 ( 0x3031, 0x3035 ),
659 ( 0x309D, 0x309E ),
660 ( 0x30FC, 0x30FE )
661 )
662
663
664 NameStartChar = CodePointSet(Letter)
665 NameStartChar.add(ord('_'))
666 NameStartChar.add(ord(':'))
667
668 NCNameStartChar = CodePointSet(Letter)
669 NCNameStartChar.add(ord('_'))
670
671 NameChar = CodePointSet(Letter)
672 NameChar.extend(Digit)
673 NameChar.add(ord('.'))
674 NameChar.add(ord('-'))
675 NameChar.add(ord('_'))
676 NameChar.add(ord(':'))
677 NameChar.extend(CombiningChar)
678 NameChar.extend(Extender)
679
680 NCNameChar = CodePointSet(Letter)
681 NCNameChar.extend(Digit)
682 NCNameChar.add(ord('.'))
683 NCNameChar.add(ord('-'))
684 NCNameChar.add(ord('_'))
685 NCNameChar.extend(CombiningChar)
686 NCNameChar.extend(Extender)
687
688 Name_pat = '%s%s*' % (NameStartChar.asPattern(), NameChar.asPattern())
689 Name_re = re.compile('^%s$' % (Name_pat,))
690 NmToken_pat = '%s+' % (NameChar.asPattern(),)
691 NmToken_re = re.compile('^%s$' % (NmToken_pat,))
692 NCName_pat = '%s%s*' % (NCNameStartChar.asPattern(), NCNameChar.asPattern())
693 NCName_re = re.compile('^%s$' % (NCName_pat,))
694 QName_pat = '(%s:)?%s' % (NCName_pat, NCName_pat)
695 QName_re = re.compile('^%s$' % (QName_pat,))
696
697
698 SingleCharEsc = { 'n' : CodePointSet(0x0A),
699 'r' : CodePointSet(0x0D),
700 't' : CodePointSet(0x09) }
701 for c in r'\|.-^?*+{}()[]':
702 SingleCharEsc[c] = CodePointSet(ord(c))
703
704
705
706 catEsc = { }
707 complEsc = { }
708 for k, v in six.iteritems(PropertyMap):
709 catEsc[six.u('p{%s}') % (k,)] = v
710 catEsc[six.u('P{%s}') % (k,)] = v.negate()
711
712
713 IsBlockEsc = { }
714 for k, v in six.iteritems(BlockMap):
715 IsBlockEsc[six.u('p{Is%s}') % (k,)] = v
716 IsBlockEsc[six.u('P{Is%s}') % (k,)] = v.negate()
717
718
719 WildcardEsc = CodePointSet(ord('\n'), ord('\r')).negate()
720 MultiCharEsc = { }
721 MultiCharEsc['s'] = CodePointSet(0x20, ord('\t'), ord('\n'), ord('\r'))
722 MultiCharEsc['S'] = MultiCharEsc['s'].negate()
723 MultiCharEsc['i'] = CodePointSet(XML1p0e2.Letter).add(ord('_')).add(ord(':'))
724 MultiCharEsc['I'] = MultiCharEsc['i'].negate()
725 MultiCharEsc['c'] = CodePointSet(XML1p0e2.NameChar)
726 MultiCharEsc['C'] = MultiCharEsc['c'].negate()
727 MultiCharEsc['d'] = PropertyMap['Nd']
728 MultiCharEsc['D'] = MultiCharEsc['d'].negate()
729 MultiCharEsc['W'] = CodePointSet(PropertyMap['P']).extend(PropertyMap['Z']).extend(PropertyMap['C'])
730 MultiCharEsc['w'] = MultiCharEsc['W'].negate()
731