1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 """This module contains support for Unicode characters as required to
17 support the regular expression syntax defined in U{annex F
18 <http://www/Documentation/W3C/www.w3.org/TR/xmlschema-2/index.html#regexs>}
19 of the XML Schema definition.
20
21 In particular, we need to be able to identify character properties and
22 block escapes, as defined in F.1.1, by name.
23
24 - Block data: U{http://www.unicode.org/Public/3.1-Update/Blocks-4.txt}
25 - Property list data: U{http://www.unicode.org/Public/3.1-Update/PropList-3.1.0.txt}
26 - Full dataset: U{http://www.unicode.org/Public/3.1-Update/UnicodeData-3.1.0.txt}
27
28 The Unicode database active at the time XML Schema 1.0 was defined is
29 archived at
30 U{http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html},
31 and refers to U{Unicode Standard Annex #27: Unicode 3.1
32 <http://www.unicode.org/unicode/reports/tr27/>}.
33 """
34
35 import re
36 import logging
37 import pyxb.utils.utility
38
39 _log = logging.getLogger(__name__)
40
41 SupportsWideUnicode = False
42 try:
43 re.compile(u'[\U0001d7ce-\U0001d7ff]')
44 SupportsWideUnicode = True
45 except:
46 pass
47
48 import bisect
51 """Raised when some abuse of a L{CodePointSet} is detected."""
52 pass
53
54 @pyxb.utils.utility.BackfillComparisons
56 """Represent a set of Unicode code points.
57
58 Each code point is an integral value between 0 and 0x10FFFF. This
59 class is used to represent a set of code points in a manner
60 suitable for use as regular expression character sets."""
61
62 MaxCodePoint = 0x10FFFF
63 """The maximum value for a code point in the Unicode code point
64 space. This is normally 0xFFFF, because wide unicode characters
65 are generally not enabled in Python builds. If, however, they are
66 enabled, this will be the full value of 0x10FFFF."""
67
68 MaxShortCodePoint = 0xFFFF
69 if not SupportsWideUnicode:
70 MaxCodePoint = MaxShortCodePoint
71
72
73
74
75
76
77
78
79 __codepoints = None
80
82 """For testing purrposes only, access to the codepoints
83 internal representation."""
84 return self.__codepoints
85
88
92
95
106
108
109
110 if isinstance(value, tuple):
111 (s, e) = value
112 e += 1
113 elif isinstance(value, basestring):
114 if 1 < len(value):
115 raise TypeError()
116 s = ord(value)
117 e = s+1
118 else:
119 s = int(value)
120 e = s+1
121 if s >= e:
122 raise ValueError('codepoint range value order')
123
124
125
126 if s > self.MaxCodePoint:
127 return self
128 if e > self.MaxCodePoint:
129 e = self.MaxCodePoint+1
130
131
132 li = bisect.bisect_left(self.__codepoints, s)
133
134 ri = bisect.bisect_right(self.__codepoints, e)
135
136 case = ((li & 1) << 1) | (ri & 1)
137 if not do_add:
138 case = 3 - case
139 if 0x03 == case:
140
141 del self.__codepoints[li:ri]
142 elif 0x02 == case:
143
144 del self.__codepoints[li+1:ri]
145 self.__codepoints[li] = e
146 elif 0x01 == case:
147
148 del self.__codepoints[li+1:ri]
149 self.__codepoints[li] = s
150 else:
151
152 self.__codepoints[li:ri] = [s, e]
153 return self
154
155 - def add (self, value):
156 """Add the given value to the code point set.
157
158 @param value: An integral value denoting a code point, or a
159 tuple C{(s,e)} denoting the start and end (inclusive) code
160 points in a range.
161 @return: C{self}"""
162 return self.__mutate(value, True)
163
165 """Add multiple values to a code point set.
166
167 @param values: Either a L{CodePointSet} instance, or an iterable
168 whose members are valid parameters to L{add}.
169
170 @return: C{self}"""
171 if isinstance(values, CodePointSet):
172 self.extend(values.asTuples())
173 else:
174 for v in values:
175 self.__mutate(v, True)
176 return self
177
179 """Remove the given value from the code point set.
180
181 @param value: An integral value denoting a code point, or a tuple
182 C{(s,e)} denoting the start and end (inclusive) code points in a
183 range, or a L{CodePointSet}.
184
185 @return: C{self}"""
186 if isinstance(value, CodePointSet):
187 for v in value.asTuples():
188 self.subtract(v)
189 return self
190 return self.__mutate(value, False)
191
192
193
194
195 __XMLtoPythonREMap = {
196 u'\x00': u'\\x00',
197
198
199 u'^': u'\\^',
200
201 u'\\': u'\\\\',
202 u'[': u'\\[',
203
204 u']': u'\\]',
205 u'-': u'\\-',
206 }
207
208
209
214
216 """Return the code point set as Unicode regular expression
217 character group consisting of a sequence of characters or
218 character ranges.
219
220 This returns a regular expression fragment using Python's
221 regular expression syntax. Note that different regular expression
222 syntaxes are not compatible, often in subtle ways.
223
224 @param with_brackets: If C{True} (default), square brackets
225 are added to enclose the returned character group."""
226 rva = []
227 if with_brackets:
228 rva.append(u'[')
229 for (s, e) in self.asTuples():
230 if s == e:
231 rva.append(self.__unichr(s))
232 else:
233 rva.extend([self.__unichr(s), '-', self.__unichr(e)])
234 if with_brackets:
235 rva.append(u']')
236 return u''.join(rva)
237
239 """Return the codepoints as tuples denoting the ranges that are in
240 the set.
241
242 Each tuple C{(s, e)} indicates that the code points from C{s}
243 (inclusive) to C{e}) (inclusive) are in the set."""
244
245 rv = []
246 start = None
247 for ri in xrange(len(self.__codepoints)):
248 if start is not None:
249 rv.append( (start, self.__codepoints[ri]-1) )
250 start = None
251 else:
252 start = self.__codepoints[ri]
253 if (start is not None) and (start <= self.MaxCodePoint):
254 rv.append( (start, self.MaxCodePoint) )
255 return rv
256
266
268 """If this set represents a single character, return it as its
269 unicode string value. Otherwise return C{None}."""
270 if (2 != len(self.__codepoints)) or (1 < (self.__codepoints[1] - self.__codepoints[0])):
271 return None
272 return unichr(self.__codepoints[0])
273
274 from pyxb.utils.unicode_data import PropertyMap
275 from pyxb.utils.unicode_data import BlockMap
278 """Regular expression support for XML Schema Data Types.
279
280 This class holds character classes and regular expressions used to
281 constrain the lexical space of XML Schema datatypes derived from
282 U{string<http://www.w3.org/TR/xmlschema-2/#string>}. They are
283 from U{XML 1.0 (Second
284 Edition)<http://www.w3.org/TR/2000/WD-xml-2e-20000814>} and
285 U{Namespaces in XML
286 <http://www.w3.org/TR/1999/REC-xml-names-19990114/>}.
287
288 Unlike the regular expressions used for pattern constraints in XML
289 Schema, which are derived from the Unicode 3.1 specification,
290 these are derived from the Unicode 2.0 specification.
291
292 The XML Schema definition refers explicitly to the second edition
293 of XML, so we have to use these code point sets and patterns. Be
294 aware that U{subsequent updates to the XML specification
295 <http://www.w3.org/XML/xml-V10-4e-errata#E09>} have changed the
296 corresponding patterns for other uses of XML. One significant
297 change is that the original specification, used here, does not
298 allow wide unicode characters."""
299
300 Char = CodePointSet(
301 0x0009,
302 0x000A,
303 0x000D,
304 ( 0x0020, 0xD7FF ),
305 ( 0xE000, 0xFFFD )
306 )
307 if SupportsWideUnicode:
308 Char.add( ( 1+CodePointSet.MaxShortCodePoint, CodePointSet.MaxCodePoint ) )
309
310 BaseChar = CodePointSet(
311 ( 0x0041, 0x005A ),
312 ( 0x0061, 0x007A ),
313 ( 0x00C0, 0x00D6 ),
314 ( 0x00D8, 0x00F6 ),
315 ( 0x00F8, 0x00FF ),
316 ( 0x0100, 0x0131 ),
317 ( 0x0134, 0x013E ),
318 ( 0x0141, 0x0148 ),
319 ( 0x014A, 0x017E ),
320 ( 0x0180, 0x01C3 ),
321 ( 0x01CD, 0x01F0 ),
322 ( 0x01F4, 0x01F5 ),
323 ( 0x01FA, 0x0217 ),
324 ( 0x0250, 0x02A8 ),
325 ( 0x02BB, 0x02C1 ),
326 0x0386,
327 ( 0x0388, 0x038A ),
328 0x038C,
329 ( 0x038E, 0x03A1 ),
330 ( 0x03A3, 0x03CE ),
331 ( 0x03D0, 0x03D6 ),
332 0x03DA,
333 0x03DC,
334 0x03DE,
335 0x03E0,
336 ( 0x03E2, 0x03F3 ),
337 ( 0x0401, 0x040C ),
338 ( 0x040E, 0x044F ),
339 ( 0x0451, 0x045C ),
340 ( 0x045E, 0x0481 ),
341 ( 0x0490, 0x04C4 ),
342 ( 0x04C7, 0x04C8 ),
343 ( 0x04CB, 0x04CC ),
344 ( 0x04D0, 0x04EB ),
345 ( 0x04EE, 0x04F5 ),
346 ( 0x04F8, 0x04F9 ),
347 ( 0x0531, 0x0556 ),
348 0x0559,
349 ( 0x0561, 0x0586 ),
350 ( 0x05D0, 0x05EA ),
351 ( 0x05F0, 0x05F2 ),
352 ( 0x0621, 0x063A ),
353 ( 0x0641, 0x064A ),
354 ( 0x0671, 0x06B7 ),
355 ( 0x06BA, 0x06BE ),
356 ( 0x06C0, 0x06CE ),
357 ( 0x06D0, 0x06D3 ),
358 0x06D5,
359 ( 0x06E5, 0x06E6 ),
360 ( 0x0905, 0x0939 ),
361 0x093D,
362 ( 0x0958, 0x0961 ),
363 ( 0x0985, 0x098C ),
364 ( 0x098F, 0x0990 ),
365 ( 0x0993, 0x09A8 ),
366 ( 0x09AA, 0x09B0 ),
367 0x09B2,
368 ( 0x09B6, 0x09B9 ),
369 ( 0x09DC, 0x09DD ),
370 ( 0x09DF, 0x09E1 ),
371 ( 0x09F0, 0x09F1 ),
372 ( 0x0A05, 0x0A0A ),
373 ( 0x0A0F, 0x0A10 ),
374 ( 0x0A13, 0x0A28 ),
375 ( 0x0A2A, 0x0A30 ),
376 ( 0x0A32, 0x0A33 ),
377 ( 0x0A35, 0x0A36 ),
378 ( 0x0A38, 0x0A39 ),
379 ( 0x0A59, 0x0A5C ),
380 0x0A5E,
381 ( 0x0A72, 0x0A74 ),
382 ( 0x0A85, 0x0A8B ),
383 0x0A8D,
384 ( 0x0A8F, 0x0A91 ),
385 ( 0x0A93, 0x0AA8 ),
386 ( 0x0AAA, 0x0AB0 ),
387 ( 0x0AB2, 0x0AB3 ),
388 ( 0x0AB5, 0x0AB9 ),
389 0x0ABD,
390 0x0AE0,
391 ( 0x0B05, 0x0B0C ),
392 ( 0x0B0F, 0x0B10 ),
393 ( 0x0B13, 0x0B28 ),
394 ( 0x0B2A, 0x0B30 ),
395 ( 0x0B32, 0x0B33 ),
396 ( 0x0B36, 0x0B39 ),
397 0x0B3D,
398 ( 0x0B5C, 0x0B5D ),
399 ( 0x0B5F, 0x0B61 ),
400 ( 0x0B85, 0x0B8A ),
401 ( 0x0B8E, 0x0B90 ),
402 ( 0x0B92, 0x0B95 ),
403 ( 0x0B99, 0x0B9A ),
404 0x0B9C,
405 ( 0x0B9E, 0x0B9F ),
406 ( 0x0BA3, 0x0BA4 ),
407 ( 0x0BA8, 0x0BAA ),
408 ( 0x0BAE, 0x0BB5 ),
409 ( 0x0BB7, 0x0BB9 ),
410 ( 0x0C05, 0x0C0C ),
411 ( 0x0C0E, 0x0C10 ),
412 ( 0x0C12, 0x0C28 ),
413 ( 0x0C2A, 0x0C33 ),
414 ( 0x0C35, 0x0C39 ),
415 ( 0x0C60, 0x0C61 ),
416 ( 0x0C85, 0x0C8C ),
417 ( 0x0C8E, 0x0C90 ),
418 ( 0x0C92, 0x0CA8 ),
419 ( 0x0CAA, 0x0CB3 ),
420 ( 0x0CB5, 0x0CB9 ),
421 0x0CDE,
422 ( 0x0CE0, 0x0CE1 ),
423 ( 0x0D05, 0x0D0C ),
424 ( 0x0D0E, 0x0D10 ),
425 ( 0x0D12, 0x0D28 ),
426 ( 0x0D2A, 0x0D39 ),
427 ( 0x0D60, 0x0D61 ),
428 ( 0x0E01, 0x0E2E ),
429 0x0E30,
430 ( 0x0E32, 0x0E33 ),
431 ( 0x0E40, 0x0E45 ),
432 ( 0x0E81, 0x0E82 ),
433 0x0E84,
434 ( 0x0E87, 0x0E88 ),
435 0x0E8A,
436 0x0E8D,
437 ( 0x0E94, 0x0E97 ),
438 ( 0x0E99, 0x0E9F ),
439 ( 0x0EA1, 0x0EA3 ),
440 0x0EA5,
441 0x0EA7,
442 ( 0x0EAA, 0x0EAB ),
443 ( 0x0EAD, 0x0EAE ),
444 0x0EB0,
445 ( 0x0EB2, 0x0EB3 ),
446 0x0EBD,
447 ( 0x0EC0, 0x0EC4 ),
448 ( 0x0F40, 0x0F47 ),
449 ( 0x0F49, 0x0F69 ),
450 ( 0x10A0, 0x10C5 ),
451 ( 0x10D0, 0x10F6 ),
452 0x1100,
453 ( 0x1102, 0x1103 ),
454 ( 0x1105, 0x1107 ),
455 0x1109,
456 ( 0x110B, 0x110C ),
457 ( 0x110E, 0x1112 ),
458 0x113C,
459 0x113E,
460 0x1140,
461 0x114C,
462 0x114E,
463 0x1150,
464 ( 0x1154, 0x1155 ),
465 0x1159,
466 ( 0x115F, 0x1161 ),
467 0x1163,
468 0x1165,
469 0x1167,
470 0x1169,
471 ( 0x116D, 0x116E ),
472 ( 0x1172, 0x1173 ),
473 0x1175,
474 0x119E,
475 0x11A8,
476 0x11AB,
477 ( 0x11AE, 0x11AF ),
478 ( 0x11B7, 0x11B8 ),
479 0x11BA,
480 ( 0x11BC, 0x11C2 ),
481 0x11EB,
482 0x11F0,
483 0x11F9,
484 ( 0x1E00, 0x1E9B ),
485 ( 0x1EA0, 0x1EF9 ),
486 ( 0x1F00, 0x1F15 ),
487 ( 0x1F18, 0x1F1D ),
488 ( 0x1F20, 0x1F45 ),
489 ( 0x1F48, 0x1F4D ),
490 ( 0x1F50, 0x1F57 ),
491 0x1F59,
492 0x1F5B,
493 0x1F5D,
494 ( 0x1F5F, 0x1F7D ),
495 ( 0x1F80, 0x1FB4 ),
496 ( 0x1FB6, 0x1FBC ),
497 0x1FBE,
498 ( 0x1FC2, 0x1FC4 ),
499 ( 0x1FC6, 0x1FCC ),
500 ( 0x1FD0, 0x1FD3 ),
501 ( 0x1FD6, 0x1FDB ),
502 ( 0x1FE0, 0x1FEC ),
503 ( 0x1FF2, 0x1FF4 ),
504 ( 0x1FF6, 0x1FFC ),
505 0x2126,
506 ( 0x212A, 0x212B ),
507 0x212E,
508 ( 0x2180, 0x2182 ),
509 ( 0x3041, 0x3094 ),
510 ( 0x30A1, 0x30FA ),
511 ( 0x3105, 0x312C ),
512 ( 0xAC00, 0xD7A3 )
513 )
514
515 Ideographic = CodePointSet(
516 ( 0x4E00, 0x9FA5 ),
517 0x3007,
518 ( 0x3021, 0x3029 )
519 )
520
521 Letter = CodePointSet(BaseChar).extend(Ideographic)
522
523 CombiningChar = CodePointSet(
524 ( 0x0300, 0x0345 ),
525 ( 0x0360, 0x0361 ),
526 ( 0x0483, 0x0486 ),
527 ( 0x0591, 0x05A1 ),
528 ( 0x05A3, 0x05B9 ),
529 ( 0x05BB, 0x05BD ),
530 0x05BF,
531 ( 0x05C1, 0x05C2 ),
532 0x05C4,
533 ( 0x064B, 0x0652 ),
534 0x0670,
535 ( 0x06D6, 0x06DC ),
536 ( 0x06DD, 0x06DF ),
537 ( 0x06E0, 0x06E4 ),
538 ( 0x06E7, 0x06E8 ),
539 ( 0x06EA, 0x06ED ),
540 ( 0x0901, 0x0903 ),
541 0x093C,
542 ( 0x093E, 0x094C ),
543 0x094D,
544 ( 0x0951, 0x0954 ),
545 ( 0x0962, 0x0963 ),
546 ( 0x0981, 0x0983 ),
547 0x09BC,
548 0x09BE,
549 0x09BF,
550 ( 0x09C0, 0x09C4 ),
551 ( 0x09C7, 0x09C8 ),
552 ( 0x09CB, 0x09CD ),
553 0x09D7,
554 ( 0x09E2, 0x09E3 ),
555 0x0A02,
556 0x0A3C,
557 0x0A3E,
558 0x0A3F,
559 ( 0x0A40, 0x0A42 ),
560 ( 0x0A47, 0x0A48 ),
561 ( 0x0A4B, 0x0A4D ),
562 ( 0x0A70, 0x0A71 ),
563 ( 0x0A81, 0x0A83 ),
564 0x0ABC,
565 ( 0x0ABE, 0x0AC5 ),
566 ( 0x0AC7, 0x0AC9 ),
567 ( 0x0ACB, 0x0ACD ),
568 ( 0x0B01, 0x0B03 ),
569 0x0B3C,
570 ( 0x0B3E, 0x0B43 ),
571 ( 0x0B47, 0x0B48 ),
572 ( 0x0B4B, 0x0B4D ),
573 ( 0x0B56, 0x0B57 ),
574 ( 0x0B82, 0x0B83 ),
575 ( 0x0BBE, 0x0BC2 ),
576 ( 0x0BC6, 0x0BC8 ),
577 ( 0x0BCA, 0x0BCD ),
578 0x0BD7,
579 ( 0x0C01, 0x0C03 ),
580 ( 0x0C3E, 0x0C44 ),
581 ( 0x0C46, 0x0C48 ),
582 ( 0x0C4A, 0x0C4D ),
583 ( 0x0C55, 0x0C56 ),
584 ( 0x0C82, 0x0C83 ),
585 ( 0x0CBE, 0x0CC4 ),
586 ( 0x0CC6, 0x0CC8 ),
587 ( 0x0CCA, 0x0CCD ),
588 ( 0x0CD5, 0x0CD6 ),
589 ( 0x0D02, 0x0D03 ),
590 ( 0x0D3E, 0x0D43 ),
591 ( 0x0D46, 0x0D48 ),
592 ( 0x0D4A, 0x0D4D ),
593 0x0D57,
594 0x0E31,
595 ( 0x0E34, 0x0E3A ),
596 ( 0x0E47, 0x0E4E ),
597 0x0EB1,
598 ( 0x0EB4, 0x0EB9 ),
599 ( 0x0EBB, 0x0EBC ),
600 ( 0x0EC8, 0x0ECD ),
601 ( 0x0F18, 0x0F19 ),
602 0x0F35,
603 0x0F37,
604 0x0F39,
605 0x0F3E,
606 0x0F3F,
607 ( 0x0F71, 0x0F84 ),
608 ( 0x0F86, 0x0F8B ),
609 ( 0x0F90, 0x0F95 ),
610 0x0F97,
611 ( 0x0F99, 0x0FAD ),
612 ( 0x0FB1, 0x0FB7 ),
613 0x0FB9,
614 ( 0x20D0, 0x20DC ),
615 0x20E1,
616 ( 0x302A, 0x302F ),
617 0x3099,
618 0x309A
619 )
620
621 Digit = CodePointSet(
622 ( 0x0030, 0x0039 ),
623 ( 0x0660, 0x0669 ),
624 ( 0x06F0, 0x06F9 ),
625 ( 0x0966, 0x096F ),
626 ( 0x09E6, 0x09EF ),
627 ( 0x0A66, 0x0A6F ),
628 ( 0x0AE6, 0x0AEF ),
629 ( 0x0B66, 0x0B6F ),
630 ( 0x0BE7, 0x0BEF ),
631 ( 0x0C66, 0x0C6F ),
632 ( 0x0CE6, 0x0CEF ),
633 ( 0x0D66, 0x0D6F ),
634 ( 0x0E50, 0x0E59 ),
635 ( 0x0ED0, 0x0ED9 ),
636 ( 0x0F20, 0x0F29 )
637 )
638
639 Extender = CodePointSet(
640 0x00B7,
641 0x02D0,
642 0x02D1,
643 0x0387,
644 0x0640,
645 0x0E46,
646 0x0EC6,
647 0x3005,
648 ( 0x3031, 0x3035 ),
649 ( 0x309D, 0x309E ),
650 ( 0x30FC, 0x30FE )
651 )
652
653
654 NameStartChar = CodePointSet(Letter)
655 NameStartChar.add(ord('_'))
656 NameStartChar.add(ord(':'))
657
658 NCNameStartChar = CodePointSet(Letter)
659 NCNameStartChar.add(ord('_'))
660
661 NameChar = CodePointSet(Letter)
662 NameChar.extend(Digit)
663 NameChar.add(ord('.'))
664 NameChar.add(ord('-'))
665 NameChar.add(ord('_'))
666 NameChar.add(ord(':'))
667 NameChar.extend(CombiningChar)
668 NameChar.extend(Extender)
669
670 NCNameChar = CodePointSet(Letter)
671 NCNameChar.extend(Digit)
672 NCNameChar.add(ord('.'))
673 NCNameChar.add(ord('-'))
674 NCNameChar.add(ord('_'))
675 NCNameChar.extend(CombiningChar)
676 NCNameChar.extend(Extender)
677
678 Name_pat = '%s%s*' % (NameStartChar.asPattern(), NameChar.asPattern())
679 Name_re = re.compile('^%s$' % (Name_pat,))
680 NmToken_pat = '%s+' % (NameChar.asPattern(),)
681 NmToken_re = re.compile('^%s$' % (NmToken_pat,))
682 NCName_pat = '%s%s*' % (NCNameStartChar.asPattern(), NCNameChar.asPattern())
683 NCName_re = re.compile('^%s$' % (NCName_pat,))
684 QName_pat = '(%s:)?%s' % (NCName_pat, NCName_pat)
685 QName_re = re.compile('^%s$' % (QName_pat,))
686
687
688 SingleCharEsc = { 'n' : CodePointSet(0x0A),
689 'r' : CodePointSet(0x0D),
690 't' : CodePointSet(0x09) }
691 for c in r'\|.-^?*+{}()[]':
692 SingleCharEsc[c] = CodePointSet(ord(c))
693
694
695
696 catEsc = { }
697 complEsc = { }
698 for k, v in PropertyMap.iteritems():
699 catEsc[u'p{%s}' % (k,)] = v
700 catEsc[u'P{%s}' % (k,)] = v.negate()
701
702
703 IsBlockEsc = { }
704 for k, v in BlockMap.iteritems():
705 IsBlockEsc[u'p{Is%s}' % (k,)] = v
706 IsBlockEsc[u'P{Is%s}' % (k,)] = v.negate()
707
708
709 WildcardEsc = CodePointSet(ord('\n'), ord('\r')).negate()
710 MultiCharEsc = { }
711 MultiCharEsc['s'] = CodePointSet(0x20, ord('\t'), ord('\n'), ord('\r'))
712 MultiCharEsc['S'] = MultiCharEsc['s'].negate()
713 MultiCharEsc['i'] = CodePointSet(XML1p0e2.Letter).add(ord('_')).add(ord(':'))
714 MultiCharEsc['I'] = MultiCharEsc['i'].negate()
715 MultiCharEsc['c'] = CodePointSet(XML1p0e2.NameChar)
716 MultiCharEsc['C'] = MultiCharEsc['c'].negate()
717 MultiCharEsc['d'] = PropertyMap['Nd']
718 MultiCharEsc['D'] = MultiCharEsc['d'].negate()
719 MultiCharEsc['W'] = CodePointSet(PropertyMap['P']).extend(PropertyMap['Z']).extend(PropertyMap['C'])
720 MultiCharEsc['w'] = MultiCharEsc['W'].negate()
721