1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 """This module contains support for Unicode characters as required to
17 support the regular expression syntax defined in U{annex F
18 <http://www/Documentation/W3C/www.w3.org/TR/xmlschema-2/index.html#regexs>}
19 of the XML Schema definition.
20
21 In particular, we need to be able to identify character properties and
22 block escapes, as defined in F.1.1, by name.
23
24 - Block data: U{http://www.unicode.org/Public/3.1-Update/Blocks-4.txt}
25 - Property list data: U{http://www.unicode.org/Public/3.1-Update/PropList-3.1.0.txt}
26 - Full dataset: U{http://www.unicode.org/Public/3.1-Update/UnicodeData-3.1.0.txt}
27
28 The Unicode database active at the time XML Schema 1.0 was defined is
29 archived at
30 U{http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html},
31 and refers to U{Unicode Standard Annex #27: Unicode 3.1
32 <http://www.unicode.org/unicode/reports/tr27/>}.
33 """
34
35 import re
36 import logging
37
38 _log = logging.getLogger(__name__)
39
40 SupportsWideUnicode = False
41 try:
42 re.compile(u'[\U0001d7ce-\U0001d7ff]')
43 SupportsWideUnicode = True
44 except:
45 pass
46
47 import bisect
48
50 """Raised when some abuse of a L{CodePointSet} is detected."""
51 pass
52
54 """Represent a set of Unicode code points.
55
56 Each code point is an integral value between 0 and 0x10FFFF. This
57 class is used to represent a set of code points in a manner
58 suitable for use as regular expression character sets."""
59
60 MaxCodePoint = 0x10FFFF
61 """The maximum value for a code point in the Unicode code point
62 space. This is normally 0xFFFF, because wide unicode characters
63 are generally not enabled in Python builds. If, however, they are
64 enabled, this will be the full value of 0x10FFFF."""
65
66 MaxShortCodePoint = 0xFFFF
67 if not SupportsWideUnicode:
68 MaxCodePoint = MaxShortCodePoint
69
70
71
72
73
74
75
76
77 __codepoints = None
78
80 """For testing purrposes only, access to the codepoints
81 internal representation."""
82 return self.__codepoints
83
87
98
100
101
102 if isinstance(value, tuple):
103 (s, e) = value
104 e += 1
105 elif isinstance(value, basestring):
106 if 1 < len(value):
107 raise TypeError()
108 s = ord(value)
109 e = s+1
110 else:
111 s = int(value)
112 e = s+1
113 if s > e:
114 raise ValueError('codepoint range value order')
115
116
117
118 if s > self.MaxCodePoint:
119 return self
120 if e > self.MaxCodePoint:
121 e = self.MaxCodePoint
122 e = min(e, self.MaxCodePoint)
123
124
125 li = bisect.bisect_left(self.__codepoints, s)
126
127 ri = bisect.bisect_right(self.__codepoints, e)
128
129 case = ((li & 1) << 1) | (ri & 1)
130 if not do_add:
131 case = 3 - case
132 if 0x03 == case:
133
134 del self.__codepoints[li:ri]
135 elif 0x02 == case:
136
137 del self.__codepoints[li+1:ri]
138 self.__codepoints[li] = e
139 elif 0x01 == case:
140
141 del self.__codepoints[li+1:ri]
142 self.__codepoints[li] = s
143 else:
144
145 self.__codepoints[li:ri] = [s, e]
146 return self
147
148 - def add (self, value):
149 """Add the given value to the code point set.
150
151 @param value: An integral value denoting a code point, or a
152 tuple C{(s,e)} denoting the start and end (inclusive) code
153 points in a range.
154 @return: C{self}"""
155 return self.__mutate(value, True)
156
158 """Add multiple values to a code point set.
159
160 @param values: Either a L{CodePointSet} instance, or an iterable
161 whose members are valid parameters to L{add}.
162
163 @return: C{self}"""
164 if isinstance(values, CodePointSet):
165 self.extend(values.asTuples())
166 else:
167 for v in values:
168 self.__mutate(v, True)
169 return self
170
172 """Remove the given value from the code point set.
173
174 @param value: An integral value denoting a code point, or a tuple
175 C{(s,e)} denoting the start and end (inclusive) code points in a
176 range, or a L{CodePointSet}.
177
178 @return: C{self}"""
179 if isinstance(value, CodePointSet):
180 for v in value.asTuples():
181 self.subtract(v)
182 return self
183 return self.__mutate(value, False)
184
185
186
187
188 __XMLtoPythonREMap = {
189 u'\x00': u'\\x00',
190
191
192 u'^': u'\\^',
193
194 u'\\': u'\\\\',
195 u'[': u'\\[',
196
197 u']': u'\\]',
198 u'-': u'\\-',
199 }
200
201
202
207
209 """Return the code point set as Unicode regular expression
210 character group consisting of a sequence of characters or
211 character ranges.
212
213 This returns a regular expression fragment using Python's
214 regular expression syntax. Note that different regular expression
215 syntaxes are not compatible, often in subtle ways.
216
217 @param with_brackets: If C{True} (default), square brackets
218 are added to enclose the returned character group."""
219 rva = []
220 if with_brackets:
221 rva.append(u'[')
222 for (s, e) in self.asTuples():
223 if s == e:
224 rva.append(self.__unichr(s))
225 else:
226 rva.extend([self.__unichr(s), '-', self.__unichr(e)])
227 if with_brackets:
228 rva.append(u']')
229 return u''.join(rva)
230
232 """Return the codepoints as tuples denoting the ranges that are in
233 the set.
234
235 Each tuple C{(s, e)} indicates that the code points from C{s}
236 (inclusive) to C{e}) (inclusive) are in the set."""
237
238 rv = []
239 start = None
240 for ri in xrange(len(self.__codepoints)):
241 if start is not None:
242 rv.append( (start, self.__codepoints[ri]-1) )
243 start = None
244 else:
245 start = self.__codepoints[ri]
246 if start is not None:
247 rv.append( (start, self.MaxCodePoint) )
248 return rv
249
259
261 """If this set represents a single character, return it as its
262 unicode string value. Otherwise return C{None}."""
263 if (2 != len(self.__codepoints)) or (1 < (self.__codepoints[1] - self.__codepoints[0])):
264 return None
265 return unichr(self.__codepoints[0])
266
267 from pyxb.utils.unicode_data import PropertyMap
268 from pyxb.utils.unicode_data import BlockMap
269
271 """Regular expression support for XML Schema Data Types.
272
273 This class holds character classes and regular expressions used to
274 constrain the lexical space of XML Schema datatypes derived from
275 U{string<http://www.w3.org/TR/xmlschema-2/#string>}. They are
276 from U{XML 1.0 (Second
277 Edition)<http://www.w3.org/TR/2000/WD-xml-2e-20000814>} and
278 U{Namespaces in XML
279 <http://www.w3.org/TR/1999/REC-xml-names-19990114/>}.
280
281 Unlike the regular expressions used for pattern constraints in XML
282 Schema, which are derived from the Unicode 3.1 specification,
283 these are derived from the Unicode 2.0 specification.
284
285 The XML Schema definition refers explicitly to the second edition
286 of XML, so we have to use these code point sets and patterns. Be
287 aware that U{subsequent updates to the XML specification
288 <http://www.w3.org/XML/xml-V10-4e-errata#E09>} have changed the
289 corresponding patterns for other uses of XML. One significant
290 change is that the original specification, used here, does not
291 allow wide unicode characters."""
292
293 Char = CodePointSet(
294 0x0009,
295 0x000A,
296 0x000D,
297 ( 0x0020, 0xD7FF ),
298 ( 0xE000, 0xFFFD )
299 )
300 if SupportsWideUnicode:
301 Char.extend( ( 0x10000, 0x10FFFF ) )
302
303 BaseChar = CodePointSet(
304 ( 0x0041, 0x005A ),
305 ( 0x0061, 0x007A ),
306 ( 0x00C0, 0x00D6 ),
307 ( 0x00D8, 0x00F6 ),
308 ( 0x00F8, 0x00FF ),
309 ( 0x0100, 0x0131 ),
310 ( 0x0134, 0x013E ),
311 ( 0x0141, 0x0148 ),
312 ( 0x014A, 0x017E ),
313 ( 0x0180, 0x01C3 ),
314 ( 0x01CD, 0x01F0 ),
315 ( 0x01F4, 0x01F5 ),
316 ( 0x01FA, 0x0217 ),
317 ( 0x0250, 0x02A8 ),
318 ( 0x02BB, 0x02C1 ),
319 0x0386,
320 ( 0x0388, 0x038A ),
321 0x038C,
322 ( 0x038E, 0x03A1 ),
323 ( 0x03A3, 0x03CE ),
324 ( 0x03D0, 0x03D6 ),
325 0x03DA,
326 0x03DC,
327 0x03DE,
328 0x03E0,
329 ( 0x03E2, 0x03F3 ),
330 ( 0x0401, 0x040C ),
331 ( 0x040E, 0x044F ),
332 ( 0x0451, 0x045C ),
333 ( 0x045E, 0x0481 ),
334 ( 0x0490, 0x04C4 ),
335 ( 0x04C7, 0x04C8 ),
336 ( 0x04CB, 0x04CC ),
337 ( 0x04D0, 0x04EB ),
338 ( 0x04EE, 0x04F5 ),
339 ( 0x04F8, 0x04F9 ),
340 ( 0x0531, 0x0556 ),
341 0x0559,
342 ( 0x0561, 0x0586 ),
343 ( 0x05D0, 0x05EA ),
344 ( 0x05F0, 0x05F2 ),
345 ( 0x0621, 0x063A ),
346 ( 0x0641, 0x064A ),
347 ( 0x0671, 0x06B7 ),
348 ( 0x06BA, 0x06BE ),
349 ( 0x06C0, 0x06CE ),
350 ( 0x06D0, 0x06D3 ),
351 0x06D5,
352 ( 0x06E5, 0x06E6 ),
353 ( 0x0905, 0x0939 ),
354 0x093D,
355 ( 0x0958, 0x0961 ),
356 ( 0x0985, 0x098C ),
357 ( 0x098F, 0x0990 ),
358 ( 0x0993, 0x09A8 ),
359 ( 0x09AA, 0x09B0 ),
360 0x09B2,
361 ( 0x09B6, 0x09B9 ),
362 ( 0x09DC, 0x09DD ),
363 ( 0x09DF, 0x09E1 ),
364 ( 0x09F0, 0x09F1 ),
365 ( 0x0A05, 0x0A0A ),
366 ( 0x0A0F, 0x0A10 ),
367 ( 0x0A13, 0x0A28 ),
368 ( 0x0A2A, 0x0A30 ),
369 ( 0x0A32, 0x0A33 ),
370 ( 0x0A35, 0x0A36 ),
371 ( 0x0A38, 0x0A39 ),
372 ( 0x0A59, 0x0A5C ),
373 0x0A5E,
374 ( 0x0A72, 0x0A74 ),
375 ( 0x0A85, 0x0A8B ),
376 0x0A8D,
377 ( 0x0A8F, 0x0A91 ),
378 ( 0x0A93, 0x0AA8 ),
379 ( 0x0AAA, 0x0AB0 ),
380 ( 0x0AB2, 0x0AB3 ),
381 ( 0x0AB5, 0x0AB9 ),
382 0x0ABD,
383 0x0AE0,
384 ( 0x0B05, 0x0B0C ),
385 ( 0x0B0F, 0x0B10 ),
386 ( 0x0B13, 0x0B28 ),
387 ( 0x0B2A, 0x0B30 ),
388 ( 0x0B32, 0x0B33 ),
389 ( 0x0B36, 0x0B39 ),
390 0x0B3D,
391 ( 0x0B5C, 0x0B5D ),
392 ( 0x0B5F, 0x0B61 ),
393 ( 0x0B85, 0x0B8A ),
394 ( 0x0B8E, 0x0B90 ),
395 ( 0x0B92, 0x0B95 ),
396 ( 0x0B99, 0x0B9A ),
397 0x0B9C,
398 ( 0x0B9E, 0x0B9F ),
399 ( 0x0BA3, 0x0BA4 ),
400 ( 0x0BA8, 0x0BAA ),
401 ( 0x0BAE, 0x0BB5 ),
402 ( 0x0BB7, 0x0BB9 ),
403 ( 0x0C05, 0x0C0C ),
404 ( 0x0C0E, 0x0C10 ),
405 ( 0x0C12, 0x0C28 ),
406 ( 0x0C2A, 0x0C33 ),
407 ( 0x0C35, 0x0C39 ),
408 ( 0x0C60, 0x0C61 ),
409 ( 0x0C85, 0x0C8C ),
410 ( 0x0C8E, 0x0C90 ),
411 ( 0x0C92, 0x0CA8 ),
412 ( 0x0CAA, 0x0CB3 ),
413 ( 0x0CB5, 0x0CB9 ),
414 0x0CDE,
415 ( 0x0CE0, 0x0CE1 ),
416 ( 0x0D05, 0x0D0C ),
417 ( 0x0D0E, 0x0D10 ),
418 ( 0x0D12, 0x0D28 ),
419 ( 0x0D2A, 0x0D39 ),
420 ( 0x0D60, 0x0D61 ),
421 ( 0x0E01, 0x0E2E ),
422 0x0E30,
423 ( 0x0E32, 0x0E33 ),
424 ( 0x0E40, 0x0E45 ),
425 ( 0x0E81, 0x0E82 ),
426 0x0E84,
427 ( 0x0E87, 0x0E88 ),
428 0x0E8A,
429 0x0E8D,
430 ( 0x0E94, 0x0E97 ),
431 ( 0x0E99, 0x0E9F ),
432 ( 0x0EA1, 0x0EA3 ),
433 0x0EA5,
434 0x0EA7,
435 ( 0x0EAA, 0x0EAB ),
436 ( 0x0EAD, 0x0EAE ),
437 0x0EB0,
438 ( 0x0EB2, 0x0EB3 ),
439 0x0EBD,
440 ( 0x0EC0, 0x0EC4 ),
441 ( 0x0F40, 0x0F47 ),
442 ( 0x0F49, 0x0F69 ),
443 ( 0x10A0, 0x10C5 ),
444 ( 0x10D0, 0x10F6 ),
445 0x1100,
446 ( 0x1102, 0x1103 ),
447 ( 0x1105, 0x1107 ),
448 0x1109,
449 ( 0x110B, 0x110C ),
450 ( 0x110E, 0x1112 ),
451 0x113C,
452 0x113E,
453 0x1140,
454 0x114C,
455 0x114E,
456 0x1150,
457 ( 0x1154, 0x1155 ),
458 0x1159,
459 ( 0x115F, 0x1161 ),
460 0x1163,
461 0x1165,
462 0x1167,
463 0x1169,
464 ( 0x116D, 0x116E ),
465 ( 0x1172, 0x1173 ),
466 0x1175,
467 0x119E,
468 0x11A8,
469 0x11AB,
470 ( 0x11AE, 0x11AF ),
471 ( 0x11B7, 0x11B8 ),
472 0x11BA,
473 ( 0x11BC, 0x11C2 ),
474 0x11EB,
475 0x11F0,
476 0x11F9,
477 ( 0x1E00, 0x1E9B ),
478 ( 0x1EA0, 0x1EF9 ),
479 ( 0x1F00, 0x1F15 ),
480 ( 0x1F18, 0x1F1D ),
481 ( 0x1F20, 0x1F45 ),
482 ( 0x1F48, 0x1F4D ),
483 ( 0x1F50, 0x1F57 ),
484 0x1F59,
485 0x1F5B,
486 0x1F5D,
487 ( 0x1F5F, 0x1F7D ),
488 ( 0x1F80, 0x1FB4 ),
489 ( 0x1FB6, 0x1FBC ),
490 0x1FBE,
491 ( 0x1FC2, 0x1FC4 ),
492 ( 0x1FC6, 0x1FCC ),
493 ( 0x1FD0, 0x1FD3 ),
494 ( 0x1FD6, 0x1FDB ),
495 ( 0x1FE0, 0x1FEC ),
496 ( 0x1FF2, 0x1FF4 ),
497 ( 0x1FF6, 0x1FFC ),
498 0x2126,
499 ( 0x212A, 0x212B ),
500 0x212E,
501 ( 0x2180, 0x2182 ),
502 ( 0x3041, 0x3094 ),
503 ( 0x30A1, 0x30FA ),
504 ( 0x3105, 0x312C ),
505 ( 0xAC00, 0xD7A3 )
506 )
507
508 Ideographic = CodePointSet(
509 ( 0x4E00, 0x9FA5 ),
510 0x3007,
511 ( 0x3021, 0x3029 )
512 )
513
514 Letter = CodePointSet(BaseChar).extend(Ideographic)
515
516 CombiningChar = CodePointSet(
517 ( 0x0300, 0x0345 ),
518 ( 0x0360, 0x0361 ),
519 ( 0x0483, 0x0486 ),
520 ( 0x0591, 0x05A1 ),
521 ( 0x05A3, 0x05B9 ),
522 ( 0x05BB, 0x05BD ),
523 0x05BF,
524 ( 0x05C1, 0x05C2 ),
525 0x05C4,
526 ( 0x064B, 0x0652 ),
527 0x0670,
528 ( 0x06D6, 0x06DC ),
529 ( 0x06DD, 0x06DF ),
530 ( 0x06E0, 0x06E4 ),
531 ( 0x06E7, 0x06E8 ),
532 ( 0x06EA, 0x06ED ),
533 ( 0x0901, 0x0903 ),
534 0x093C,
535 ( 0x093E, 0x094C ),
536 0x094D,
537 ( 0x0951, 0x0954 ),
538 ( 0x0962, 0x0963 ),
539 ( 0x0981, 0x0983 ),
540 0x09BC,
541 0x09BE,
542 0x09BF,
543 ( 0x09C0, 0x09C4 ),
544 ( 0x09C7, 0x09C8 ),
545 ( 0x09CB, 0x09CD ),
546 0x09D7,
547 ( 0x09E2, 0x09E3 ),
548 0x0A02,
549 0x0A3C,
550 0x0A3E,
551 0x0A3F,
552 ( 0x0A40, 0x0A42 ),
553 ( 0x0A47, 0x0A48 ),
554 ( 0x0A4B, 0x0A4D ),
555 ( 0x0A70, 0x0A71 ),
556 ( 0x0A81, 0x0A83 ),
557 0x0ABC,
558 ( 0x0ABE, 0x0AC5 ),
559 ( 0x0AC7, 0x0AC9 ),
560 ( 0x0ACB, 0x0ACD ),
561 ( 0x0B01, 0x0B03 ),
562 0x0B3C,
563 ( 0x0B3E, 0x0B43 ),
564 ( 0x0B47, 0x0B48 ),
565 ( 0x0B4B, 0x0B4D ),
566 ( 0x0B56, 0x0B57 ),
567 ( 0x0B82, 0x0B83 ),
568 ( 0x0BBE, 0x0BC2 ),
569 ( 0x0BC6, 0x0BC8 ),
570 ( 0x0BCA, 0x0BCD ),
571 0x0BD7,
572 ( 0x0C01, 0x0C03 ),
573 ( 0x0C3E, 0x0C44 ),
574 ( 0x0C46, 0x0C48 ),
575 ( 0x0C4A, 0x0C4D ),
576 ( 0x0C55, 0x0C56 ),
577 ( 0x0C82, 0x0C83 ),
578 ( 0x0CBE, 0x0CC4 ),
579 ( 0x0CC6, 0x0CC8 ),
580 ( 0x0CCA, 0x0CCD ),
581 ( 0x0CD5, 0x0CD6 ),
582 ( 0x0D02, 0x0D03 ),
583 ( 0x0D3E, 0x0D43 ),
584 ( 0x0D46, 0x0D48 ),
585 ( 0x0D4A, 0x0D4D ),
586 0x0D57,
587 0x0E31,
588 ( 0x0E34, 0x0E3A ),
589 ( 0x0E47, 0x0E4E ),
590 0x0EB1,
591 ( 0x0EB4, 0x0EB9 ),
592 ( 0x0EBB, 0x0EBC ),
593 ( 0x0EC8, 0x0ECD ),
594 ( 0x0F18, 0x0F19 ),
595 0x0F35,
596 0x0F37,
597 0x0F39,
598 0x0F3E,
599 0x0F3F,
600 ( 0x0F71, 0x0F84 ),
601 ( 0x0F86, 0x0F8B ),
602 ( 0x0F90, 0x0F95 ),
603 0x0F97,
604 ( 0x0F99, 0x0FAD ),
605 ( 0x0FB1, 0x0FB7 ),
606 0x0FB9,
607 ( 0x20D0, 0x20DC ),
608 0x20E1,
609 ( 0x302A, 0x302F ),
610 0x3099,
611 0x309A
612 )
613
614 Digit = CodePointSet(
615 ( 0x0030, 0x0039 ),
616 ( 0x0660, 0x0669 ),
617 ( 0x06F0, 0x06F9 ),
618 ( 0x0966, 0x096F ),
619 ( 0x09E6, 0x09EF ),
620 ( 0x0A66, 0x0A6F ),
621 ( 0x0AE6, 0x0AEF ),
622 ( 0x0B66, 0x0B6F ),
623 ( 0x0BE7, 0x0BEF ),
624 ( 0x0C66, 0x0C6F ),
625 ( 0x0CE6, 0x0CEF ),
626 ( 0x0D66, 0x0D6F ),
627 ( 0x0E50, 0x0E59 ),
628 ( 0x0ED0, 0x0ED9 ),
629 ( 0x0F20, 0x0F29 )
630 )
631
632 Extender = CodePointSet(
633 0x00B7,
634 0x02D0,
635 0x02D1,
636 0x0387,
637 0x0640,
638 0x0E46,
639 0x0EC6,
640 0x3005,
641 ( 0x3031, 0x3035 ),
642 ( 0x309D, 0x309E ),
643 ( 0x30FC, 0x30FE )
644 )
645
646
647 NameStartChar = CodePointSet(Letter)
648 NameStartChar.add(ord('_'))
649 NameStartChar.add(ord(':'))
650
651 NCNameStartChar = CodePointSet(Letter)
652 NCNameStartChar.add(ord('_'))
653
654 NameChar = CodePointSet(Letter)
655 NameChar.extend(Digit)
656 NameChar.add(ord('.'))
657 NameChar.add(ord('-'))
658 NameChar.add(ord('_'))
659 NameChar.add(ord(':'))
660 NameChar.extend(CombiningChar)
661 NameChar.extend(Extender)
662
663 NCNameChar = CodePointSet(Letter)
664 NCNameChar.extend(Digit)
665 NCNameChar.add(ord('.'))
666 NCNameChar.add(ord('-'))
667 NCNameChar.add(ord('_'))
668 NCNameChar.extend(CombiningChar)
669 NCNameChar.extend(Extender)
670
671 Name_pat = '%s%s*' % (NameStartChar.asPattern(), NameChar.asPattern())
672 Name_re = re.compile('^%s$' % (Name_pat,))
673 NmToken_pat = '%s+' % (NameChar.asPattern(),)
674 NmToken_re = re.compile('^%s$' % (NmToken_pat,))
675 NCName_pat = '%s%s*' % (NCNameStartChar.asPattern(), NCNameChar.asPattern())
676 NCName_re = re.compile('^%s$' % (NCName_pat,))
677 QName_pat = '(%s:)?%s' % (NCName_pat, NCName_pat)
678 QName_re = re.compile('^%s$' % (QName_pat,))
679
680
681 SingleCharEsc = { 'n' : CodePointSet(0x0A),
682 'r' : CodePointSet(0x0D),
683 't' : CodePointSet(0x09) }
684 for c in r'\|.-^?*+{}()[]':
685 SingleCharEsc[c] = CodePointSet(ord(c))
686
687
688
689 catEsc = { }
690 complEsc = { }
691 for k, v in PropertyMap.iteritems():
692 catEsc[u'p{%s}' % (k,)] = v
693 catEsc[u'P{%s}' % (k,)] = v.negate()
694
695
696 IsBlockEsc = { }
697 for k, v in BlockMap.iteritems():
698 IsBlockEsc[u'p{Is%s}' % (k,)] = v
699 IsBlockEsc[u'P{Is%s}' % (k,)] = v.negate()
700
701
702 WildcardEsc = CodePointSet(ord('\n'), ord('\r')).negate()
703 MultiCharEsc = { }
704 MultiCharEsc['s'] = CodePointSet(0x20, ord('\t'), ord('\n'), ord('\r'))
705 MultiCharEsc['S'] = MultiCharEsc['s'].negate()
706 MultiCharEsc['i'] = CodePointSet(XML1p0e2.Letter).add(ord('_')).add(ord(':'))
707 MultiCharEsc['I'] = MultiCharEsc['i'].negate()
708 MultiCharEsc['c'] = CodePointSet(XML1p0e2.NameChar)
709 MultiCharEsc['C'] = MultiCharEsc['c'].negate()
710 MultiCharEsc['d'] = PropertyMap['Nd']
711 MultiCharEsc['D'] = MultiCharEsc['d'].negate()
712 MultiCharEsc['W'] = CodePointSet(PropertyMap['P']).extend(PropertyMap['Z']).extend(PropertyMap['C'])
713 MultiCharEsc['w'] = MultiCharEsc['W'].negate()
714