xref: /aosp_15_r20/prebuilts/build-tools/common/py3-stdlib/re/_compiler.py (revision cda5da8d549138a6648c5ee6d7a49cf8f4a657be)
1*cda5da8dSAndroid Build Coastguard Worker#
2*cda5da8dSAndroid Build Coastguard Worker# Secret Labs' Regular Expression Engine
3*cda5da8dSAndroid Build Coastguard Worker#
4*cda5da8dSAndroid Build Coastguard Worker# convert template to internal format
5*cda5da8dSAndroid Build Coastguard Worker#
6*cda5da8dSAndroid Build Coastguard Worker# Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7*cda5da8dSAndroid Build Coastguard Worker#
8*cda5da8dSAndroid Build Coastguard Worker# See the __init__.py file for information on usage and redistribution.
9*cda5da8dSAndroid Build Coastguard Worker#
10*cda5da8dSAndroid Build Coastguard Worker
11*cda5da8dSAndroid Build Coastguard Worker"""Internal support module for sre"""
12*cda5da8dSAndroid Build Coastguard Worker
13*cda5da8dSAndroid Build Coastguard Workerimport _sre
14*cda5da8dSAndroid Build Coastguard Workerfrom . import _parser
15*cda5da8dSAndroid Build Coastguard Workerfrom ._constants import *
16*cda5da8dSAndroid Build Coastguard Workerfrom ._casefix import _EXTRA_CASES
17*cda5da8dSAndroid Build Coastguard Worker
18*cda5da8dSAndroid Build Coastguard Workerassert _sre.MAGIC == MAGIC, "SRE module mismatch"
19*cda5da8dSAndroid Build Coastguard Worker
20*cda5da8dSAndroid Build Coastguard Worker_LITERAL_CODES = {LITERAL, NOT_LITERAL}
21*cda5da8dSAndroid Build Coastguard Worker_SUCCESS_CODES = {SUCCESS, FAILURE}
22*cda5da8dSAndroid Build Coastguard Worker_ASSERT_CODES = {ASSERT, ASSERT_NOT}
23*cda5da8dSAndroid Build Coastguard Worker_UNIT_CODES = _LITERAL_CODES | {ANY, IN}
24*cda5da8dSAndroid Build Coastguard Worker
25*cda5da8dSAndroid Build Coastguard Worker_REPEATING_CODES = {
26*cda5da8dSAndroid Build Coastguard Worker    MIN_REPEAT: (REPEAT, MIN_UNTIL, MIN_REPEAT_ONE),
27*cda5da8dSAndroid Build Coastguard Worker    MAX_REPEAT: (REPEAT, MAX_UNTIL, REPEAT_ONE),
28*cda5da8dSAndroid Build Coastguard Worker    POSSESSIVE_REPEAT: (POSSESSIVE_REPEAT, SUCCESS, POSSESSIVE_REPEAT_ONE),
29*cda5da8dSAndroid Build Coastguard Worker}
30*cda5da8dSAndroid Build Coastguard Worker
31*cda5da8dSAndroid Build Coastguard Workerdef _combine_flags(flags, add_flags, del_flags,
32*cda5da8dSAndroid Build Coastguard Worker                   TYPE_FLAGS=_parser.TYPE_FLAGS):
33*cda5da8dSAndroid Build Coastguard Worker    if add_flags & TYPE_FLAGS:
34*cda5da8dSAndroid Build Coastguard Worker        flags &= ~TYPE_FLAGS
35*cda5da8dSAndroid Build Coastguard Worker    return (flags | add_flags) & ~del_flags
36*cda5da8dSAndroid Build Coastguard Worker
37*cda5da8dSAndroid Build Coastguard Workerdef _compile(code, pattern, flags):
38*cda5da8dSAndroid Build Coastguard Worker    # internal: compile a (sub)pattern
39*cda5da8dSAndroid Build Coastguard Worker    emit = code.append
40*cda5da8dSAndroid Build Coastguard Worker    _len = len
41*cda5da8dSAndroid Build Coastguard Worker    LITERAL_CODES = _LITERAL_CODES
42*cda5da8dSAndroid Build Coastguard Worker    REPEATING_CODES = _REPEATING_CODES
43*cda5da8dSAndroid Build Coastguard Worker    SUCCESS_CODES = _SUCCESS_CODES
44*cda5da8dSAndroid Build Coastguard Worker    ASSERT_CODES = _ASSERT_CODES
45*cda5da8dSAndroid Build Coastguard Worker    iscased = None
46*cda5da8dSAndroid Build Coastguard Worker    tolower = None
47*cda5da8dSAndroid Build Coastguard Worker    fixes = None
48*cda5da8dSAndroid Build Coastguard Worker    if flags & SRE_FLAG_IGNORECASE and not flags & SRE_FLAG_LOCALE:
49*cda5da8dSAndroid Build Coastguard Worker        if flags & SRE_FLAG_UNICODE:
50*cda5da8dSAndroid Build Coastguard Worker            iscased = _sre.unicode_iscased
51*cda5da8dSAndroid Build Coastguard Worker            tolower = _sre.unicode_tolower
52*cda5da8dSAndroid Build Coastguard Worker            fixes = _EXTRA_CASES
53*cda5da8dSAndroid Build Coastguard Worker        else:
54*cda5da8dSAndroid Build Coastguard Worker            iscased = _sre.ascii_iscased
55*cda5da8dSAndroid Build Coastguard Worker            tolower = _sre.ascii_tolower
56*cda5da8dSAndroid Build Coastguard Worker    for op, av in pattern:
57*cda5da8dSAndroid Build Coastguard Worker        if op in LITERAL_CODES:
58*cda5da8dSAndroid Build Coastguard Worker            if not flags & SRE_FLAG_IGNORECASE:
59*cda5da8dSAndroid Build Coastguard Worker                emit(op)
60*cda5da8dSAndroid Build Coastguard Worker                emit(av)
61*cda5da8dSAndroid Build Coastguard Worker            elif flags & SRE_FLAG_LOCALE:
62*cda5da8dSAndroid Build Coastguard Worker                emit(OP_LOCALE_IGNORE[op])
63*cda5da8dSAndroid Build Coastguard Worker                emit(av)
64*cda5da8dSAndroid Build Coastguard Worker            elif not iscased(av):
65*cda5da8dSAndroid Build Coastguard Worker                emit(op)
66*cda5da8dSAndroid Build Coastguard Worker                emit(av)
67*cda5da8dSAndroid Build Coastguard Worker            else:
68*cda5da8dSAndroid Build Coastguard Worker                lo = tolower(av)
69*cda5da8dSAndroid Build Coastguard Worker                if not fixes:  # ascii
70*cda5da8dSAndroid Build Coastguard Worker                    emit(OP_IGNORE[op])
71*cda5da8dSAndroid Build Coastguard Worker                    emit(lo)
72*cda5da8dSAndroid Build Coastguard Worker                elif lo not in fixes:
73*cda5da8dSAndroid Build Coastguard Worker                    emit(OP_UNICODE_IGNORE[op])
74*cda5da8dSAndroid Build Coastguard Worker                    emit(lo)
75*cda5da8dSAndroid Build Coastguard Worker                else:
76*cda5da8dSAndroid Build Coastguard Worker                    emit(IN_UNI_IGNORE)
77*cda5da8dSAndroid Build Coastguard Worker                    skip = _len(code); emit(0)
78*cda5da8dSAndroid Build Coastguard Worker                    if op is NOT_LITERAL:
79*cda5da8dSAndroid Build Coastguard Worker                        emit(NEGATE)
80*cda5da8dSAndroid Build Coastguard Worker                    for k in (lo,) + fixes[lo]:
81*cda5da8dSAndroid Build Coastguard Worker                        emit(LITERAL)
82*cda5da8dSAndroid Build Coastguard Worker                        emit(k)
83*cda5da8dSAndroid Build Coastguard Worker                    emit(FAILURE)
84*cda5da8dSAndroid Build Coastguard Worker                    code[skip] = _len(code) - skip
85*cda5da8dSAndroid Build Coastguard Worker        elif op is IN:
86*cda5da8dSAndroid Build Coastguard Worker            charset, hascased = _optimize_charset(av, iscased, tolower, fixes)
87*cda5da8dSAndroid Build Coastguard Worker            if flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE:
88*cda5da8dSAndroid Build Coastguard Worker                emit(IN_LOC_IGNORE)
89*cda5da8dSAndroid Build Coastguard Worker            elif not hascased:
90*cda5da8dSAndroid Build Coastguard Worker                emit(IN)
91*cda5da8dSAndroid Build Coastguard Worker            elif not fixes:  # ascii
92*cda5da8dSAndroid Build Coastguard Worker                emit(IN_IGNORE)
93*cda5da8dSAndroid Build Coastguard Worker            else:
94*cda5da8dSAndroid Build Coastguard Worker                emit(IN_UNI_IGNORE)
95*cda5da8dSAndroid Build Coastguard Worker            skip = _len(code); emit(0)
96*cda5da8dSAndroid Build Coastguard Worker            _compile_charset(charset, flags, code)
97*cda5da8dSAndroid Build Coastguard Worker            code[skip] = _len(code) - skip
98*cda5da8dSAndroid Build Coastguard Worker        elif op is ANY:
99*cda5da8dSAndroid Build Coastguard Worker            if flags & SRE_FLAG_DOTALL:
100*cda5da8dSAndroid Build Coastguard Worker                emit(ANY_ALL)
101*cda5da8dSAndroid Build Coastguard Worker            else:
102*cda5da8dSAndroid Build Coastguard Worker                emit(ANY)
103*cda5da8dSAndroid Build Coastguard Worker        elif op in REPEATING_CODES:
104*cda5da8dSAndroid Build Coastguard Worker            if flags & SRE_FLAG_TEMPLATE:
105*cda5da8dSAndroid Build Coastguard Worker                raise error("internal: unsupported template operator %r" % (op,))
106*cda5da8dSAndroid Build Coastguard Worker            if _simple(av[2]):
107*cda5da8dSAndroid Build Coastguard Worker                emit(REPEATING_CODES[op][2])
108*cda5da8dSAndroid Build Coastguard Worker                skip = _len(code); emit(0)
109*cda5da8dSAndroid Build Coastguard Worker                emit(av[0])
110*cda5da8dSAndroid Build Coastguard Worker                emit(av[1])
111*cda5da8dSAndroid Build Coastguard Worker                _compile(code, av[2], flags)
112*cda5da8dSAndroid Build Coastguard Worker                emit(SUCCESS)
113*cda5da8dSAndroid Build Coastguard Worker                code[skip] = _len(code) - skip
114*cda5da8dSAndroid Build Coastguard Worker            else:
115*cda5da8dSAndroid Build Coastguard Worker                emit(REPEATING_CODES[op][0])
116*cda5da8dSAndroid Build Coastguard Worker                skip = _len(code); emit(0)
117*cda5da8dSAndroid Build Coastguard Worker                emit(av[0])
118*cda5da8dSAndroid Build Coastguard Worker                emit(av[1])
119*cda5da8dSAndroid Build Coastguard Worker                _compile(code, av[2], flags)
120*cda5da8dSAndroid Build Coastguard Worker                code[skip] = _len(code) - skip
121*cda5da8dSAndroid Build Coastguard Worker                emit(REPEATING_CODES[op][1])
122*cda5da8dSAndroid Build Coastguard Worker        elif op is SUBPATTERN:
123*cda5da8dSAndroid Build Coastguard Worker            group, add_flags, del_flags, p = av
124*cda5da8dSAndroid Build Coastguard Worker            if group:
125*cda5da8dSAndroid Build Coastguard Worker                emit(MARK)
126*cda5da8dSAndroid Build Coastguard Worker                emit((group-1)*2)
127*cda5da8dSAndroid Build Coastguard Worker            # _compile_info(code, p, _combine_flags(flags, add_flags, del_flags))
128*cda5da8dSAndroid Build Coastguard Worker            _compile(code, p, _combine_flags(flags, add_flags, del_flags))
129*cda5da8dSAndroid Build Coastguard Worker            if group:
130*cda5da8dSAndroid Build Coastguard Worker                emit(MARK)
131*cda5da8dSAndroid Build Coastguard Worker                emit((group-1)*2+1)
132*cda5da8dSAndroid Build Coastguard Worker        elif op is ATOMIC_GROUP:
133*cda5da8dSAndroid Build Coastguard Worker            # Atomic Groups are handled by starting with an Atomic
134*cda5da8dSAndroid Build Coastguard Worker            # Group op code, then putting in the atomic group pattern
135*cda5da8dSAndroid Build Coastguard Worker            # and finally a success op code to tell any repeat
136*cda5da8dSAndroid Build Coastguard Worker            # operations within the Atomic Group to stop eating and
137*cda5da8dSAndroid Build Coastguard Worker            # pop their stack if they reach it
138*cda5da8dSAndroid Build Coastguard Worker            emit(ATOMIC_GROUP)
139*cda5da8dSAndroid Build Coastguard Worker            skip = _len(code); emit(0)
140*cda5da8dSAndroid Build Coastguard Worker            _compile(code, av, flags)
141*cda5da8dSAndroid Build Coastguard Worker            emit(SUCCESS)
142*cda5da8dSAndroid Build Coastguard Worker            code[skip] = _len(code) - skip
143*cda5da8dSAndroid Build Coastguard Worker        elif op in SUCCESS_CODES:
144*cda5da8dSAndroid Build Coastguard Worker            emit(op)
145*cda5da8dSAndroid Build Coastguard Worker        elif op in ASSERT_CODES:
146*cda5da8dSAndroid Build Coastguard Worker            emit(op)
147*cda5da8dSAndroid Build Coastguard Worker            skip = _len(code); emit(0)
148*cda5da8dSAndroid Build Coastguard Worker            if av[0] >= 0:
149*cda5da8dSAndroid Build Coastguard Worker                emit(0) # look ahead
150*cda5da8dSAndroid Build Coastguard Worker            else:
151*cda5da8dSAndroid Build Coastguard Worker                lo, hi = av[1].getwidth()
152*cda5da8dSAndroid Build Coastguard Worker                if lo != hi:
153*cda5da8dSAndroid Build Coastguard Worker                    raise error("look-behind requires fixed-width pattern")
154*cda5da8dSAndroid Build Coastguard Worker                emit(lo) # look behind
155*cda5da8dSAndroid Build Coastguard Worker            _compile(code, av[1], flags)
156*cda5da8dSAndroid Build Coastguard Worker            emit(SUCCESS)
157*cda5da8dSAndroid Build Coastguard Worker            code[skip] = _len(code) - skip
158*cda5da8dSAndroid Build Coastguard Worker        elif op is AT:
159*cda5da8dSAndroid Build Coastguard Worker            emit(op)
160*cda5da8dSAndroid Build Coastguard Worker            if flags & SRE_FLAG_MULTILINE:
161*cda5da8dSAndroid Build Coastguard Worker                av = AT_MULTILINE.get(av, av)
162*cda5da8dSAndroid Build Coastguard Worker            if flags & SRE_FLAG_LOCALE:
163*cda5da8dSAndroid Build Coastguard Worker                av = AT_LOCALE.get(av, av)
164*cda5da8dSAndroid Build Coastguard Worker            elif flags & SRE_FLAG_UNICODE:
165*cda5da8dSAndroid Build Coastguard Worker                av = AT_UNICODE.get(av, av)
166*cda5da8dSAndroid Build Coastguard Worker            emit(av)
167*cda5da8dSAndroid Build Coastguard Worker        elif op is BRANCH:
168*cda5da8dSAndroid Build Coastguard Worker            emit(op)
169*cda5da8dSAndroid Build Coastguard Worker            tail = []
170*cda5da8dSAndroid Build Coastguard Worker            tailappend = tail.append
171*cda5da8dSAndroid Build Coastguard Worker            for av in av[1]:
172*cda5da8dSAndroid Build Coastguard Worker                skip = _len(code); emit(0)
173*cda5da8dSAndroid Build Coastguard Worker                # _compile_info(code, av, flags)
174*cda5da8dSAndroid Build Coastguard Worker                _compile(code, av, flags)
175*cda5da8dSAndroid Build Coastguard Worker                emit(JUMP)
176*cda5da8dSAndroid Build Coastguard Worker                tailappend(_len(code)); emit(0)
177*cda5da8dSAndroid Build Coastguard Worker                code[skip] = _len(code) - skip
178*cda5da8dSAndroid Build Coastguard Worker            emit(FAILURE) # end of branch
179*cda5da8dSAndroid Build Coastguard Worker            for tail in tail:
180*cda5da8dSAndroid Build Coastguard Worker                code[tail] = _len(code) - tail
181*cda5da8dSAndroid Build Coastguard Worker        elif op is CATEGORY:
182*cda5da8dSAndroid Build Coastguard Worker            emit(op)
183*cda5da8dSAndroid Build Coastguard Worker            if flags & SRE_FLAG_LOCALE:
184*cda5da8dSAndroid Build Coastguard Worker                av = CH_LOCALE[av]
185*cda5da8dSAndroid Build Coastguard Worker            elif flags & SRE_FLAG_UNICODE:
186*cda5da8dSAndroid Build Coastguard Worker                av = CH_UNICODE[av]
187*cda5da8dSAndroid Build Coastguard Worker            emit(av)
188*cda5da8dSAndroid Build Coastguard Worker        elif op is GROUPREF:
189*cda5da8dSAndroid Build Coastguard Worker            if not flags & SRE_FLAG_IGNORECASE:
190*cda5da8dSAndroid Build Coastguard Worker                emit(op)
191*cda5da8dSAndroid Build Coastguard Worker            elif flags & SRE_FLAG_LOCALE:
192*cda5da8dSAndroid Build Coastguard Worker                emit(GROUPREF_LOC_IGNORE)
193*cda5da8dSAndroid Build Coastguard Worker            elif not fixes:  # ascii
194*cda5da8dSAndroid Build Coastguard Worker                emit(GROUPREF_IGNORE)
195*cda5da8dSAndroid Build Coastguard Worker            else:
196*cda5da8dSAndroid Build Coastguard Worker                emit(GROUPREF_UNI_IGNORE)
197*cda5da8dSAndroid Build Coastguard Worker            emit(av-1)
198*cda5da8dSAndroid Build Coastguard Worker        elif op is GROUPREF_EXISTS:
199*cda5da8dSAndroid Build Coastguard Worker            emit(op)
200*cda5da8dSAndroid Build Coastguard Worker            emit(av[0]-1)
201*cda5da8dSAndroid Build Coastguard Worker            skipyes = _len(code); emit(0)
202*cda5da8dSAndroid Build Coastguard Worker            _compile(code, av[1], flags)
203*cda5da8dSAndroid Build Coastguard Worker            if av[2]:
204*cda5da8dSAndroid Build Coastguard Worker                emit(JUMP)
205*cda5da8dSAndroid Build Coastguard Worker                skipno = _len(code); emit(0)
206*cda5da8dSAndroid Build Coastguard Worker                code[skipyes] = _len(code) - skipyes + 1
207*cda5da8dSAndroid Build Coastguard Worker                _compile(code, av[2], flags)
208*cda5da8dSAndroid Build Coastguard Worker                code[skipno] = _len(code) - skipno
209*cda5da8dSAndroid Build Coastguard Worker            else:
210*cda5da8dSAndroid Build Coastguard Worker                code[skipyes] = _len(code) - skipyes + 1
211*cda5da8dSAndroid Build Coastguard Worker        else:
212*cda5da8dSAndroid Build Coastguard Worker            raise error("internal: unsupported operand type %r" % (op,))
213*cda5da8dSAndroid Build Coastguard Worker
214*cda5da8dSAndroid Build Coastguard Workerdef _compile_charset(charset, flags, code):
215*cda5da8dSAndroid Build Coastguard Worker    # compile charset subprogram
216*cda5da8dSAndroid Build Coastguard Worker    emit = code.append
217*cda5da8dSAndroid Build Coastguard Worker    for op, av in charset:
218*cda5da8dSAndroid Build Coastguard Worker        emit(op)
219*cda5da8dSAndroid Build Coastguard Worker        if op is NEGATE:
220*cda5da8dSAndroid Build Coastguard Worker            pass
221*cda5da8dSAndroid Build Coastguard Worker        elif op is LITERAL:
222*cda5da8dSAndroid Build Coastguard Worker            emit(av)
223*cda5da8dSAndroid Build Coastguard Worker        elif op is RANGE or op is RANGE_UNI_IGNORE:
224*cda5da8dSAndroid Build Coastguard Worker            emit(av[0])
225*cda5da8dSAndroid Build Coastguard Worker            emit(av[1])
226*cda5da8dSAndroid Build Coastguard Worker        elif op is CHARSET:
227*cda5da8dSAndroid Build Coastguard Worker            code.extend(av)
228*cda5da8dSAndroid Build Coastguard Worker        elif op is BIGCHARSET:
229*cda5da8dSAndroid Build Coastguard Worker            code.extend(av)
230*cda5da8dSAndroid Build Coastguard Worker        elif op is CATEGORY:
231*cda5da8dSAndroid Build Coastguard Worker            if flags & SRE_FLAG_LOCALE:
232*cda5da8dSAndroid Build Coastguard Worker                emit(CH_LOCALE[av])
233*cda5da8dSAndroid Build Coastguard Worker            elif flags & SRE_FLAG_UNICODE:
234*cda5da8dSAndroid Build Coastguard Worker                emit(CH_UNICODE[av])
235*cda5da8dSAndroid Build Coastguard Worker            else:
236*cda5da8dSAndroid Build Coastguard Worker                emit(av)
237*cda5da8dSAndroid Build Coastguard Worker        else:
238*cda5da8dSAndroid Build Coastguard Worker            raise error("internal: unsupported set operator %r" % (op,))
239*cda5da8dSAndroid Build Coastguard Worker    emit(FAILURE)
240*cda5da8dSAndroid Build Coastguard Worker
241*cda5da8dSAndroid Build Coastguard Workerdef _optimize_charset(charset, iscased=None, fixup=None, fixes=None):
242*cda5da8dSAndroid Build Coastguard Worker    # internal: optimize character set
243*cda5da8dSAndroid Build Coastguard Worker    out = []
244*cda5da8dSAndroid Build Coastguard Worker    tail = []
245*cda5da8dSAndroid Build Coastguard Worker    charmap = bytearray(256)
246*cda5da8dSAndroid Build Coastguard Worker    hascased = False
247*cda5da8dSAndroid Build Coastguard Worker    for op, av in charset:
248*cda5da8dSAndroid Build Coastguard Worker        while True:
249*cda5da8dSAndroid Build Coastguard Worker            try:
250*cda5da8dSAndroid Build Coastguard Worker                if op is LITERAL:
251*cda5da8dSAndroid Build Coastguard Worker                    if fixup:
252*cda5da8dSAndroid Build Coastguard Worker                        lo = fixup(av)
253*cda5da8dSAndroid Build Coastguard Worker                        charmap[lo] = 1
254*cda5da8dSAndroid Build Coastguard Worker                        if fixes and lo in fixes:
255*cda5da8dSAndroid Build Coastguard Worker                            for k in fixes[lo]:
256*cda5da8dSAndroid Build Coastguard Worker                                charmap[k] = 1
257*cda5da8dSAndroid Build Coastguard Worker                        if not hascased and iscased(av):
258*cda5da8dSAndroid Build Coastguard Worker                            hascased = True
259*cda5da8dSAndroid Build Coastguard Worker                    else:
260*cda5da8dSAndroid Build Coastguard Worker                        charmap[av] = 1
261*cda5da8dSAndroid Build Coastguard Worker                elif op is RANGE:
262*cda5da8dSAndroid Build Coastguard Worker                    r = range(av[0], av[1]+1)
263*cda5da8dSAndroid Build Coastguard Worker                    if fixup:
264*cda5da8dSAndroid Build Coastguard Worker                        if fixes:
265*cda5da8dSAndroid Build Coastguard Worker                            for i in map(fixup, r):
266*cda5da8dSAndroid Build Coastguard Worker                                charmap[i] = 1
267*cda5da8dSAndroid Build Coastguard Worker                                if i in fixes:
268*cda5da8dSAndroid Build Coastguard Worker                                    for k in fixes[i]:
269*cda5da8dSAndroid Build Coastguard Worker                                        charmap[k] = 1
270*cda5da8dSAndroid Build Coastguard Worker                        else:
271*cda5da8dSAndroid Build Coastguard Worker                            for i in map(fixup, r):
272*cda5da8dSAndroid Build Coastguard Worker                                charmap[i] = 1
273*cda5da8dSAndroid Build Coastguard Worker                        if not hascased:
274*cda5da8dSAndroid Build Coastguard Worker                            hascased = any(map(iscased, r))
275*cda5da8dSAndroid Build Coastguard Worker                    else:
276*cda5da8dSAndroid Build Coastguard Worker                        for i in r:
277*cda5da8dSAndroid Build Coastguard Worker                            charmap[i] = 1
278*cda5da8dSAndroid Build Coastguard Worker                elif op is NEGATE:
279*cda5da8dSAndroid Build Coastguard Worker                    out.append((op, av))
280*cda5da8dSAndroid Build Coastguard Worker                else:
281*cda5da8dSAndroid Build Coastguard Worker                    tail.append((op, av))
282*cda5da8dSAndroid Build Coastguard Worker            except IndexError:
283*cda5da8dSAndroid Build Coastguard Worker                if len(charmap) == 256:
284*cda5da8dSAndroid Build Coastguard Worker                    # character set contains non-UCS1 character codes
285*cda5da8dSAndroid Build Coastguard Worker                    charmap += b'\0' * 0xff00
286*cda5da8dSAndroid Build Coastguard Worker                    continue
287*cda5da8dSAndroid Build Coastguard Worker                # Character set contains non-BMP character codes.
288*cda5da8dSAndroid Build Coastguard Worker                # For range, all BMP characters in the range are already
289*cda5da8dSAndroid Build Coastguard Worker                # proceeded.
290*cda5da8dSAndroid Build Coastguard Worker                if fixup:
291*cda5da8dSAndroid Build Coastguard Worker                    hascased = True
292*cda5da8dSAndroid Build Coastguard Worker                    # For now, IN_UNI_IGNORE+LITERAL and
293*cda5da8dSAndroid Build Coastguard Worker                    # IN_UNI_IGNORE+RANGE_UNI_IGNORE work for all non-BMP
294*cda5da8dSAndroid Build Coastguard Worker                    # characters, because two characters (at least one of
295*cda5da8dSAndroid Build Coastguard Worker                    # which is not in the BMP) match case-insensitively
296*cda5da8dSAndroid Build Coastguard Worker                    # if and only if:
297*cda5da8dSAndroid Build Coastguard Worker                    # 1) c1.lower() == c2.lower()
298*cda5da8dSAndroid Build Coastguard Worker                    # 2) c1.lower() == c2 or c1.lower().upper() == c2
299*cda5da8dSAndroid Build Coastguard Worker                    # Also, both c.lower() and c.lower().upper() are single
300*cda5da8dSAndroid Build Coastguard Worker                    # characters for every non-BMP character.
301*cda5da8dSAndroid Build Coastguard Worker                    if op is RANGE:
302*cda5da8dSAndroid Build Coastguard Worker                        op = RANGE_UNI_IGNORE
303*cda5da8dSAndroid Build Coastguard Worker                tail.append((op, av))
304*cda5da8dSAndroid Build Coastguard Worker            break
305*cda5da8dSAndroid Build Coastguard Worker
306*cda5da8dSAndroid Build Coastguard Worker    # compress character map
307*cda5da8dSAndroid Build Coastguard Worker    runs = []
308*cda5da8dSAndroid Build Coastguard Worker    q = 0
309*cda5da8dSAndroid Build Coastguard Worker    while True:
310*cda5da8dSAndroid Build Coastguard Worker        p = charmap.find(1, q)
311*cda5da8dSAndroid Build Coastguard Worker        if p < 0:
312*cda5da8dSAndroid Build Coastguard Worker            break
313*cda5da8dSAndroid Build Coastguard Worker        if len(runs) >= 2:
314*cda5da8dSAndroid Build Coastguard Worker            runs = None
315*cda5da8dSAndroid Build Coastguard Worker            break
316*cda5da8dSAndroid Build Coastguard Worker        q = charmap.find(0, p)
317*cda5da8dSAndroid Build Coastguard Worker        if q < 0:
318*cda5da8dSAndroid Build Coastguard Worker            runs.append((p, len(charmap)))
319*cda5da8dSAndroid Build Coastguard Worker            break
320*cda5da8dSAndroid Build Coastguard Worker        runs.append((p, q))
321*cda5da8dSAndroid Build Coastguard Worker    if runs is not None:
322*cda5da8dSAndroid Build Coastguard Worker        # use literal/range
323*cda5da8dSAndroid Build Coastguard Worker        for p, q in runs:
324*cda5da8dSAndroid Build Coastguard Worker            if q - p == 1:
325*cda5da8dSAndroid Build Coastguard Worker                out.append((LITERAL, p))
326*cda5da8dSAndroid Build Coastguard Worker            else:
327*cda5da8dSAndroid Build Coastguard Worker                out.append((RANGE, (p, q - 1)))
328*cda5da8dSAndroid Build Coastguard Worker        out += tail
329*cda5da8dSAndroid Build Coastguard Worker        # if the case was changed or new representation is more compact
330*cda5da8dSAndroid Build Coastguard Worker        if hascased or len(out) < len(charset):
331*cda5da8dSAndroid Build Coastguard Worker            return out, hascased
332*cda5da8dSAndroid Build Coastguard Worker        # else original character set is good enough
333*cda5da8dSAndroid Build Coastguard Worker        return charset, hascased
334*cda5da8dSAndroid Build Coastguard Worker
335*cda5da8dSAndroid Build Coastguard Worker    # use bitmap
336*cda5da8dSAndroid Build Coastguard Worker    if len(charmap) == 256:
337*cda5da8dSAndroid Build Coastguard Worker        data = _mk_bitmap(charmap)
338*cda5da8dSAndroid Build Coastguard Worker        out.append((CHARSET, data))
339*cda5da8dSAndroid Build Coastguard Worker        out += tail
340*cda5da8dSAndroid Build Coastguard Worker        return out, hascased
341*cda5da8dSAndroid Build Coastguard Worker
342*cda5da8dSAndroid Build Coastguard Worker    # To represent a big charset, first a bitmap of all characters in the
343*cda5da8dSAndroid Build Coastguard Worker    # set is constructed. Then, this bitmap is sliced into chunks of 256
344*cda5da8dSAndroid Build Coastguard Worker    # characters, duplicate chunks are eliminated, and each chunk is
345*cda5da8dSAndroid Build Coastguard Worker    # given a number. In the compiled expression, the charset is
346*cda5da8dSAndroid Build Coastguard Worker    # represented by a 32-bit word sequence, consisting of one word for
347*cda5da8dSAndroid Build Coastguard Worker    # the number of different chunks, a sequence of 256 bytes (64 words)
348*cda5da8dSAndroid Build Coastguard Worker    # of chunk numbers indexed by their original chunk position, and a
349*cda5da8dSAndroid Build Coastguard Worker    # sequence of 256-bit chunks (8 words each).
350*cda5da8dSAndroid Build Coastguard Worker
351*cda5da8dSAndroid Build Coastguard Worker    # Compression is normally good: in a typical charset, large ranges of
352*cda5da8dSAndroid Build Coastguard Worker    # Unicode will be either completely excluded (e.g. if only cyrillic
353*cda5da8dSAndroid Build Coastguard Worker    # letters are to be matched), or completely included (e.g. if large
354*cda5da8dSAndroid Build Coastguard Worker    # subranges of Kanji match). These ranges will be represented by
355*cda5da8dSAndroid Build Coastguard Worker    # chunks of all one-bits or all zero-bits.
356*cda5da8dSAndroid Build Coastguard Worker
357*cda5da8dSAndroid Build Coastguard Worker    # Matching can be also done efficiently: the more significant byte of
358*cda5da8dSAndroid Build Coastguard Worker    # the Unicode character is an index into the chunk number, and the
359*cda5da8dSAndroid Build Coastguard Worker    # less significant byte is a bit index in the chunk (just like the
360*cda5da8dSAndroid Build Coastguard Worker    # CHARSET matching).
361*cda5da8dSAndroid Build Coastguard Worker
362*cda5da8dSAndroid Build Coastguard Worker    charmap = bytes(charmap) # should be hashable
363*cda5da8dSAndroid Build Coastguard Worker    comps = {}
364*cda5da8dSAndroid Build Coastguard Worker    mapping = bytearray(256)
365*cda5da8dSAndroid Build Coastguard Worker    block = 0
366*cda5da8dSAndroid Build Coastguard Worker    data = bytearray()
367*cda5da8dSAndroid Build Coastguard Worker    for i in range(0, 65536, 256):
368*cda5da8dSAndroid Build Coastguard Worker        chunk = charmap[i: i + 256]
369*cda5da8dSAndroid Build Coastguard Worker        if chunk in comps:
370*cda5da8dSAndroid Build Coastguard Worker            mapping[i // 256] = comps[chunk]
371*cda5da8dSAndroid Build Coastguard Worker        else:
372*cda5da8dSAndroid Build Coastguard Worker            mapping[i // 256] = comps[chunk] = block
373*cda5da8dSAndroid Build Coastguard Worker            block += 1
374*cda5da8dSAndroid Build Coastguard Worker            data += chunk
375*cda5da8dSAndroid Build Coastguard Worker    data = _mk_bitmap(data)
376*cda5da8dSAndroid Build Coastguard Worker    data[0:0] = [block] + _bytes_to_codes(mapping)
377*cda5da8dSAndroid Build Coastguard Worker    out.append((BIGCHARSET, data))
378*cda5da8dSAndroid Build Coastguard Worker    out += tail
379*cda5da8dSAndroid Build Coastguard Worker    return out, hascased
380*cda5da8dSAndroid Build Coastguard Worker
381*cda5da8dSAndroid Build Coastguard Worker_CODEBITS = _sre.CODESIZE * 8
382*cda5da8dSAndroid Build Coastguard WorkerMAXCODE = (1 << _CODEBITS) - 1
383*cda5da8dSAndroid Build Coastguard Worker_BITS_TRANS = b'0' + b'1' * 255
384*cda5da8dSAndroid Build Coastguard Workerdef _mk_bitmap(bits, _CODEBITS=_CODEBITS, _int=int):
385*cda5da8dSAndroid Build Coastguard Worker    s = bits.translate(_BITS_TRANS)[::-1]
386*cda5da8dSAndroid Build Coastguard Worker    return [_int(s[i - _CODEBITS: i], 2)
387*cda5da8dSAndroid Build Coastguard Worker            for i in range(len(s), 0, -_CODEBITS)]
388*cda5da8dSAndroid Build Coastguard Worker
389*cda5da8dSAndroid Build Coastguard Workerdef _bytes_to_codes(b):
390*cda5da8dSAndroid Build Coastguard Worker    # Convert block indices to word array
391*cda5da8dSAndroid Build Coastguard Worker    a = memoryview(b).cast('I')
392*cda5da8dSAndroid Build Coastguard Worker    assert a.itemsize == _sre.CODESIZE
393*cda5da8dSAndroid Build Coastguard Worker    assert len(a) * a.itemsize == len(b)
394*cda5da8dSAndroid Build Coastguard Worker    return a.tolist()
395*cda5da8dSAndroid Build Coastguard Worker
396*cda5da8dSAndroid Build Coastguard Workerdef _simple(p):
397*cda5da8dSAndroid Build Coastguard Worker    # check if this subpattern is a "simple" operator
398*cda5da8dSAndroid Build Coastguard Worker    if len(p) != 1:
399*cda5da8dSAndroid Build Coastguard Worker        return False
400*cda5da8dSAndroid Build Coastguard Worker    op, av = p[0]
401*cda5da8dSAndroid Build Coastguard Worker    if op is SUBPATTERN:
402*cda5da8dSAndroid Build Coastguard Worker        return av[0] is None and _simple(av[-1])
403*cda5da8dSAndroid Build Coastguard Worker    return op in _UNIT_CODES
404*cda5da8dSAndroid Build Coastguard Worker
405*cda5da8dSAndroid Build Coastguard Workerdef _generate_overlap_table(prefix):
406*cda5da8dSAndroid Build Coastguard Worker    """
407*cda5da8dSAndroid Build Coastguard Worker    Generate an overlap table for the following prefix.
408*cda5da8dSAndroid Build Coastguard Worker    An overlap table is a table of the same size as the prefix which
409*cda5da8dSAndroid Build Coastguard Worker    informs about the potential self-overlap for each index in the prefix:
410*cda5da8dSAndroid Build Coastguard Worker    - if overlap[i] == 0, prefix[i:] can't overlap prefix[0:...]
411*cda5da8dSAndroid Build Coastguard Worker    - if overlap[i] == k with 0 < k <= i, prefix[i-k+1:i+1] overlaps with
412*cda5da8dSAndroid Build Coastguard Worker      prefix[0:k]
413*cda5da8dSAndroid Build Coastguard Worker    """
414*cda5da8dSAndroid Build Coastguard Worker    table = [0] * len(prefix)
415*cda5da8dSAndroid Build Coastguard Worker    for i in range(1, len(prefix)):
416*cda5da8dSAndroid Build Coastguard Worker        idx = table[i - 1]
417*cda5da8dSAndroid Build Coastguard Worker        while prefix[i] != prefix[idx]:
418*cda5da8dSAndroid Build Coastguard Worker            if idx == 0:
419*cda5da8dSAndroid Build Coastguard Worker                table[i] = 0
420*cda5da8dSAndroid Build Coastguard Worker                break
421*cda5da8dSAndroid Build Coastguard Worker            idx = table[idx - 1]
422*cda5da8dSAndroid Build Coastguard Worker        else:
423*cda5da8dSAndroid Build Coastguard Worker            table[i] = idx + 1
424*cda5da8dSAndroid Build Coastguard Worker    return table
425*cda5da8dSAndroid Build Coastguard Worker
426*cda5da8dSAndroid Build Coastguard Workerdef _get_iscased(flags):
427*cda5da8dSAndroid Build Coastguard Worker    if not flags & SRE_FLAG_IGNORECASE:
428*cda5da8dSAndroid Build Coastguard Worker        return None
429*cda5da8dSAndroid Build Coastguard Worker    elif flags & SRE_FLAG_UNICODE:
430*cda5da8dSAndroid Build Coastguard Worker        return _sre.unicode_iscased
431*cda5da8dSAndroid Build Coastguard Worker    else:
432*cda5da8dSAndroid Build Coastguard Worker        return _sre.ascii_iscased
433*cda5da8dSAndroid Build Coastguard Worker
434*cda5da8dSAndroid Build Coastguard Workerdef _get_literal_prefix(pattern, flags):
435*cda5da8dSAndroid Build Coastguard Worker    # look for literal prefix
436*cda5da8dSAndroid Build Coastguard Worker    prefix = []
437*cda5da8dSAndroid Build Coastguard Worker    prefixappend = prefix.append
438*cda5da8dSAndroid Build Coastguard Worker    prefix_skip = None
439*cda5da8dSAndroid Build Coastguard Worker    iscased = _get_iscased(flags)
440*cda5da8dSAndroid Build Coastguard Worker    for op, av in pattern.data:
441*cda5da8dSAndroid Build Coastguard Worker        if op is LITERAL:
442*cda5da8dSAndroid Build Coastguard Worker            if iscased and iscased(av):
443*cda5da8dSAndroid Build Coastguard Worker                break
444*cda5da8dSAndroid Build Coastguard Worker            prefixappend(av)
445*cda5da8dSAndroid Build Coastguard Worker        elif op is SUBPATTERN:
446*cda5da8dSAndroid Build Coastguard Worker            group, add_flags, del_flags, p = av
447*cda5da8dSAndroid Build Coastguard Worker            flags1 = _combine_flags(flags, add_flags, del_flags)
448*cda5da8dSAndroid Build Coastguard Worker            if flags1 & SRE_FLAG_IGNORECASE and flags1 & SRE_FLAG_LOCALE:
449*cda5da8dSAndroid Build Coastguard Worker                break
450*cda5da8dSAndroid Build Coastguard Worker            prefix1, prefix_skip1, got_all = _get_literal_prefix(p, flags1)
451*cda5da8dSAndroid Build Coastguard Worker            if prefix_skip is None:
452*cda5da8dSAndroid Build Coastguard Worker                if group is not None:
453*cda5da8dSAndroid Build Coastguard Worker                    prefix_skip = len(prefix)
454*cda5da8dSAndroid Build Coastguard Worker                elif prefix_skip1 is not None:
455*cda5da8dSAndroid Build Coastguard Worker                    prefix_skip = len(prefix) + prefix_skip1
456*cda5da8dSAndroid Build Coastguard Worker            prefix.extend(prefix1)
457*cda5da8dSAndroid Build Coastguard Worker            if not got_all:
458*cda5da8dSAndroid Build Coastguard Worker                break
459*cda5da8dSAndroid Build Coastguard Worker        else:
460*cda5da8dSAndroid Build Coastguard Worker            break
461*cda5da8dSAndroid Build Coastguard Worker    else:
462*cda5da8dSAndroid Build Coastguard Worker        return prefix, prefix_skip, True
463*cda5da8dSAndroid Build Coastguard Worker    return prefix, prefix_skip, False
464*cda5da8dSAndroid Build Coastguard Worker
465*cda5da8dSAndroid Build Coastguard Workerdef _get_charset_prefix(pattern, flags):
466*cda5da8dSAndroid Build Coastguard Worker    while True:
467*cda5da8dSAndroid Build Coastguard Worker        if not pattern.data:
468*cda5da8dSAndroid Build Coastguard Worker            return None
469*cda5da8dSAndroid Build Coastguard Worker        op, av = pattern.data[0]
470*cda5da8dSAndroid Build Coastguard Worker        if op is not SUBPATTERN:
471*cda5da8dSAndroid Build Coastguard Worker            break
472*cda5da8dSAndroid Build Coastguard Worker        group, add_flags, del_flags, pattern = av
473*cda5da8dSAndroid Build Coastguard Worker        flags = _combine_flags(flags, add_flags, del_flags)
474*cda5da8dSAndroid Build Coastguard Worker        if flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE:
475*cda5da8dSAndroid Build Coastguard Worker            return None
476*cda5da8dSAndroid Build Coastguard Worker
477*cda5da8dSAndroid Build Coastguard Worker    iscased = _get_iscased(flags)
478*cda5da8dSAndroid Build Coastguard Worker    if op is LITERAL:
479*cda5da8dSAndroid Build Coastguard Worker        if iscased and iscased(av):
480*cda5da8dSAndroid Build Coastguard Worker            return None
481*cda5da8dSAndroid Build Coastguard Worker        return [(op, av)]
482*cda5da8dSAndroid Build Coastguard Worker    elif op is BRANCH:
483*cda5da8dSAndroid Build Coastguard Worker        charset = []
484*cda5da8dSAndroid Build Coastguard Worker        charsetappend = charset.append
485*cda5da8dSAndroid Build Coastguard Worker        for p in av[1]:
486*cda5da8dSAndroid Build Coastguard Worker            if not p:
487*cda5da8dSAndroid Build Coastguard Worker                return None
488*cda5da8dSAndroid Build Coastguard Worker            op, av = p[0]
489*cda5da8dSAndroid Build Coastguard Worker            if op is LITERAL and not (iscased and iscased(av)):
490*cda5da8dSAndroid Build Coastguard Worker                charsetappend((op, av))
491*cda5da8dSAndroid Build Coastguard Worker            else:
492*cda5da8dSAndroid Build Coastguard Worker                return None
493*cda5da8dSAndroid Build Coastguard Worker        return charset
494*cda5da8dSAndroid Build Coastguard Worker    elif op is IN:
495*cda5da8dSAndroid Build Coastguard Worker        charset = av
496*cda5da8dSAndroid Build Coastguard Worker        if iscased:
497*cda5da8dSAndroid Build Coastguard Worker            for op, av in charset:
498*cda5da8dSAndroid Build Coastguard Worker                if op is LITERAL:
499*cda5da8dSAndroid Build Coastguard Worker                    if iscased(av):
500*cda5da8dSAndroid Build Coastguard Worker                        return None
501*cda5da8dSAndroid Build Coastguard Worker                elif op is RANGE:
502*cda5da8dSAndroid Build Coastguard Worker                    if av[1] > 0xffff:
503*cda5da8dSAndroid Build Coastguard Worker                        return None
504*cda5da8dSAndroid Build Coastguard Worker                    if any(map(iscased, range(av[0], av[1]+1))):
505*cda5da8dSAndroid Build Coastguard Worker                        return None
506*cda5da8dSAndroid Build Coastguard Worker        return charset
507*cda5da8dSAndroid Build Coastguard Worker    return None
508*cda5da8dSAndroid Build Coastguard Worker
509*cda5da8dSAndroid Build Coastguard Workerdef _compile_info(code, pattern, flags):
510*cda5da8dSAndroid Build Coastguard Worker    # internal: compile an info block.  in the current version,
511*cda5da8dSAndroid Build Coastguard Worker    # this contains min/max pattern width, and an optional literal
512*cda5da8dSAndroid Build Coastguard Worker    # prefix or a character map
513*cda5da8dSAndroid Build Coastguard Worker    lo, hi = pattern.getwidth()
514*cda5da8dSAndroid Build Coastguard Worker    if hi > MAXCODE:
515*cda5da8dSAndroid Build Coastguard Worker        hi = MAXCODE
516*cda5da8dSAndroid Build Coastguard Worker    if lo == 0:
517*cda5da8dSAndroid Build Coastguard Worker        code.extend([INFO, 4, 0, lo, hi])
518*cda5da8dSAndroid Build Coastguard Worker        return
519*cda5da8dSAndroid Build Coastguard Worker    # look for a literal prefix
520*cda5da8dSAndroid Build Coastguard Worker    prefix = []
521*cda5da8dSAndroid Build Coastguard Worker    prefix_skip = 0
522*cda5da8dSAndroid Build Coastguard Worker    charset = [] # not used
523*cda5da8dSAndroid Build Coastguard Worker    if not (flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE):
524*cda5da8dSAndroid Build Coastguard Worker        # look for literal prefix
525*cda5da8dSAndroid Build Coastguard Worker        prefix, prefix_skip, got_all = _get_literal_prefix(pattern, flags)
526*cda5da8dSAndroid Build Coastguard Worker        # if no prefix, look for charset prefix
527*cda5da8dSAndroid Build Coastguard Worker        if not prefix:
528*cda5da8dSAndroid Build Coastguard Worker            charset = _get_charset_prefix(pattern, flags)
529*cda5da8dSAndroid Build Coastguard Worker##     if prefix:
530*cda5da8dSAndroid Build Coastguard Worker##         print("*** PREFIX", prefix, prefix_skip)
531*cda5da8dSAndroid Build Coastguard Worker##     if charset:
532*cda5da8dSAndroid Build Coastguard Worker##         print("*** CHARSET", charset)
533*cda5da8dSAndroid Build Coastguard Worker    # add an info block
534*cda5da8dSAndroid Build Coastguard Worker    emit = code.append
535*cda5da8dSAndroid Build Coastguard Worker    emit(INFO)
536*cda5da8dSAndroid Build Coastguard Worker    skip = len(code); emit(0)
537*cda5da8dSAndroid Build Coastguard Worker    # literal flag
538*cda5da8dSAndroid Build Coastguard Worker    mask = 0
539*cda5da8dSAndroid Build Coastguard Worker    if prefix:
540*cda5da8dSAndroid Build Coastguard Worker        mask = SRE_INFO_PREFIX
541*cda5da8dSAndroid Build Coastguard Worker        if prefix_skip is None and got_all:
542*cda5da8dSAndroid Build Coastguard Worker            mask = mask | SRE_INFO_LITERAL
543*cda5da8dSAndroid Build Coastguard Worker    elif charset:
544*cda5da8dSAndroid Build Coastguard Worker        mask = mask | SRE_INFO_CHARSET
545*cda5da8dSAndroid Build Coastguard Worker    emit(mask)
546*cda5da8dSAndroid Build Coastguard Worker    # pattern length
547*cda5da8dSAndroid Build Coastguard Worker    if lo < MAXCODE:
548*cda5da8dSAndroid Build Coastguard Worker        emit(lo)
549*cda5da8dSAndroid Build Coastguard Worker    else:
550*cda5da8dSAndroid Build Coastguard Worker        emit(MAXCODE)
551*cda5da8dSAndroid Build Coastguard Worker        prefix = prefix[:MAXCODE]
552*cda5da8dSAndroid Build Coastguard Worker    emit(min(hi, MAXCODE))
553*cda5da8dSAndroid Build Coastguard Worker    # add literal prefix
554*cda5da8dSAndroid Build Coastguard Worker    if prefix:
555*cda5da8dSAndroid Build Coastguard Worker        emit(len(prefix)) # length
556*cda5da8dSAndroid Build Coastguard Worker        if prefix_skip is None:
557*cda5da8dSAndroid Build Coastguard Worker            prefix_skip =  len(prefix)
558*cda5da8dSAndroid Build Coastguard Worker        emit(prefix_skip) # skip
559*cda5da8dSAndroid Build Coastguard Worker        code.extend(prefix)
560*cda5da8dSAndroid Build Coastguard Worker        # generate overlap table
561*cda5da8dSAndroid Build Coastguard Worker        code.extend(_generate_overlap_table(prefix))
562*cda5da8dSAndroid Build Coastguard Worker    elif charset:
563*cda5da8dSAndroid Build Coastguard Worker        charset, hascased = _optimize_charset(charset)
564*cda5da8dSAndroid Build Coastguard Worker        assert not hascased
565*cda5da8dSAndroid Build Coastguard Worker        _compile_charset(charset, flags, code)
566*cda5da8dSAndroid Build Coastguard Worker    code[skip] = len(code) - skip
567*cda5da8dSAndroid Build Coastguard Worker
568*cda5da8dSAndroid Build Coastguard Workerdef isstring(obj):
569*cda5da8dSAndroid Build Coastguard Worker    return isinstance(obj, (str, bytes))
570*cda5da8dSAndroid Build Coastguard Worker
571*cda5da8dSAndroid Build Coastguard Workerdef _code(p, flags):
572*cda5da8dSAndroid Build Coastguard Worker
573*cda5da8dSAndroid Build Coastguard Worker    flags = p.state.flags | flags
574*cda5da8dSAndroid Build Coastguard Worker    code = []
575*cda5da8dSAndroid Build Coastguard Worker
576*cda5da8dSAndroid Build Coastguard Worker    # compile info block
577*cda5da8dSAndroid Build Coastguard Worker    _compile_info(code, p, flags)
578*cda5da8dSAndroid Build Coastguard Worker
579*cda5da8dSAndroid Build Coastguard Worker    # compile the pattern
580*cda5da8dSAndroid Build Coastguard Worker    _compile(code, p.data, flags)
581*cda5da8dSAndroid Build Coastguard Worker
582*cda5da8dSAndroid Build Coastguard Worker    code.append(SUCCESS)
583*cda5da8dSAndroid Build Coastguard Worker
584*cda5da8dSAndroid Build Coastguard Worker    return code
585*cda5da8dSAndroid Build Coastguard Worker
586*cda5da8dSAndroid Build Coastguard Workerdef _hex_code(code):
587*cda5da8dSAndroid Build Coastguard Worker    return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code)
588*cda5da8dSAndroid Build Coastguard Worker
589*cda5da8dSAndroid Build Coastguard Workerdef dis(code):
590*cda5da8dSAndroid Build Coastguard Worker    import sys
591*cda5da8dSAndroid Build Coastguard Worker
592*cda5da8dSAndroid Build Coastguard Worker    labels = set()
593*cda5da8dSAndroid Build Coastguard Worker    level = 0
594*cda5da8dSAndroid Build Coastguard Worker    offset_width = len(str(len(code) - 1))
595*cda5da8dSAndroid Build Coastguard Worker
596*cda5da8dSAndroid Build Coastguard Worker    def dis_(start, end):
597*cda5da8dSAndroid Build Coastguard Worker        def print_(*args, to=None):
598*cda5da8dSAndroid Build Coastguard Worker            if to is not None:
599*cda5da8dSAndroid Build Coastguard Worker                labels.add(to)
600*cda5da8dSAndroid Build Coastguard Worker                args += ('(to %d)' % (to,),)
601*cda5da8dSAndroid Build Coastguard Worker            print('%*d%s ' % (offset_width, start, ':' if start in labels else '.'),
602*cda5da8dSAndroid Build Coastguard Worker                  end='  '*(level-1))
603*cda5da8dSAndroid Build Coastguard Worker            print(*args)
604*cda5da8dSAndroid Build Coastguard Worker
605*cda5da8dSAndroid Build Coastguard Worker        def print_2(*args):
606*cda5da8dSAndroid Build Coastguard Worker            print(end=' '*(offset_width + 2*level))
607*cda5da8dSAndroid Build Coastguard Worker            print(*args)
608*cda5da8dSAndroid Build Coastguard Worker
609*cda5da8dSAndroid Build Coastguard Worker        nonlocal level
610*cda5da8dSAndroid Build Coastguard Worker        level += 1
611*cda5da8dSAndroid Build Coastguard Worker        i = start
612*cda5da8dSAndroid Build Coastguard Worker        while i < end:
613*cda5da8dSAndroid Build Coastguard Worker            start = i
614*cda5da8dSAndroid Build Coastguard Worker            op = code[i]
615*cda5da8dSAndroid Build Coastguard Worker            i += 1
616*cda5da8dSAndroid Build Coastguard Worker            op = OPCODES[op]
617*cda5da8dSAndroid Build Coastguard Worker            if op in (SUCCESS, FAILURE, ANY, ANY_ALL,
618*cda5da8dSAndroid Build Coastguard Worker                      MAX_UNTIL, MIN_UNTIL, NEGATE):
619*cda5da8dSAndroid Build Coastguard Worker                print_(op)
620*cda5da8dSAndroid Build Coastguard Worker            elif op in (LITERAL, NOT_LITERAL,
621*cda5da8dSAndroid Build Coastguard Worker                        LITERAL_IGNORE, NOT_LITERAL_IGNORE,
622*cda5da8dSAndroid Build Coastguard Worker                        LITERAL_UNI_IGNORE, NOT_LITERAL_UNI_IGNORE,
623*cda5da8dSAndroid Build Coastguard Worker                        LITERAL_LOC_IGNORE, NOT_LITERAL_LOC_IGNORE):
624*cda5da8dSAndroid Build Coastguard Worker                arg = code[i]
625*cda5da8dSAndroid Build Coastguard Worker                i += 1
626*cda5da8dSAndroid Build Coastguard Worker                print_(op, '%#02x (%r)' % (arg, chr(arg)))
627*cda5da8dSAndroid Build Coastguard Worker            elif op is AT:
628*cda5da8dSAndroid Build Coastguard Worker                arg = code[i]
629*cda5da8dSAndroid Build Coastguard Worker                i += 1
630*cda5da8dSAndroid Build Coastguard Worker                arg = str(ATCODES[arg])
631*cda5da8dSAndroid Build Coastguard Worker                assert arg[:3] == 'AT_'
632*cda5da8dSAndroid Build Coastguard Worker                print_(op, arg[3:])
633*cda5da8dSAndroid Build Coastguard Worker            elif op is CATEGORY:
634*cda5da8dSAndroid Build Coastguard Worker                arg = code[i]
635*cda5da8dSAndroid Build Coastguard Worker                i += 1
636*cda5da8dSAndroid Build Coastguard Worker                arg = str(CHCODES[arg])
637*cda5da8dSAndroid Build Coastguard Worker                assert arg[:9] == 'CATEGORY_'
638*cda5da8dSAndroid Build Coastguard Worker                print_(op, arg[9:])
639*cda5da8dSAndroid Build Coastguard Worker            elif op in (IN, IN_IGNORE, IN_UNI_IGNORE, IN_LOC_IGNORE):
640*cda5da8dSAndroid Build Coastguard Worker                skip = code[i]
641*cda5da8dSAndroid Build Coastguard Worker                print_(op, skip, to=i+skip)
642*cda5da8dSAndroid Build Coastguard Worker                dis_(i+1, i+skip)
643*cda5da8dSAndroid Build Coastguard Worker                i += skip
644*cda5da8dSAndroid Build Coastguard Worker            elif op in (RANGE, RANGE_UNI_IGNORE):
645*cda5da8dSAndroid Build Coastguard Worker                lo, hi = code[i: i+2]
646*cda5da8dSAndroid Build Coastguard Worker                i += 2
647*cda5da8dSAndroid Build Coastguard Worker                print_(op, '%#02x %#02x (%r-%r)' % (lo, hi, chr(lo), chr(hi)))
648*cda5da8dSAndroid Build Coastguard Worker            elif op is CHARSET:
649*cda5da8dSAndroid Build Coastguard Worker                print_(op, _hex_code(code[i: i + 256//_CODEBITS]))
650*cda5da8dSAndroid Build Coastguard Worker                i += 256//_CODEBITS
651*cda5da8dSAndroid Build Coastguard Worker            elif op is BIGCHARSET:
652*cda5da8dSAndroid Build Coastguard Worker                arg = code[i]
653*cda5da8dSAndroid Build Coastguard Worker                i += 1
654*cda5da8dSAndroid Build Coastguard Worker                mapping = list(b''.join(x.to_bytes(_sre.CODESIZE, sys.byteorder)
655*cda5da8dSAndroid Build Coastguard Worker                                        for x in code[i: i + 256//_sre.CODESIZE]))
656*cda5da8dSAndroid Build Coastguard Worker                print_(op, arg, mapping)
657*cda5da8dSAndroid Build Coastguard Worker                i += 256//_sre.CODESIZE
658*cda5da8dSAndroid Build Coastguard Worker                level += 1
659*cda5da8dSAndroid Build Coastguard Worker                for j in range(arg):
660*cda5da8dSAndroid Build Coastguard Worker                    print_2(_hex_code(code[i: i + 256//_CODEBITS]))
661*cda5da8dSAndroid Build Coastguard Worker                    i += 256//_CODEBITS
662*cda5da8dSAndroid Build Coastguard Worker                level -= 1
663*cda5da8dSAndroid Build Coastguard Worker            elif op in (MARK, GROUPREF, GROUPREF_IGNORE, GROUPREF_UNI_IGNORE,
664*cda5da8dSAndroid Build Coastguard Worker                        GROUPREF_LOC_IGNORE):
665*cda5da8dSAndroid Build Coastguard Worker                arg = code[i]
666*cda5da8dSAndroid Build Coastguard Worker                i += 1
667*cda5da8dSAndroid Build Coastguard Worker                print_(op, arg)
668*cda5da8dSAndroid Build Coastguard Worker            elif op is JUMP:
669*cda5da8dSAndroid Build Coastguard Worker                skip = code[i]
670*cda5da8dSAndroid Build Coastguard Worker                print_(op, skip, to=i+skip)
671*cda5da8dSAndroid Build Coastguard Worker                i += 1
672*cda5da8dSAndroid Build Coastguard Worker            elif op is BRANCH:
673*cda5da8dSAndroid Build Coastguard Worker                skip = code[i]
674*cda5da8dSAndroid Build Coastguard Worker                print_(op, skip, to=i+skip)
675*cda5da8dSAndroid Build Coastguard Worker                while skip:
676*cda5da8dSAndroid Build Coastguard Worker                    dis_(i+1, i+skip)
677*cda5da8dSAndroid Build Coastguard Worker                    i += skip
678*cda5da8dSAndroid Build Coastguard Worker                    start = i
679*cda5da8dSAndroid Build Coastguard Worker                    skip = code[i]
680*cda5da8dSAndroid Build Coastguard Worker                    if skip:
681*cda5da8dSAndroid Build Coastguard Worker                        print_('branch', skip, to=i+skip)
682*cda5da8dSAndroid Build Coastguard Worker                    else:
683*cda5da8dSAndroid Build Coastguard Worker                        print_(FAILURE)
684*cda5da8dSAndroid Build Coastguard Worker                i += 1
685*cda5da8dSAndroid Build Coastguard Worker            elif op in (REPEAT, REPEAT_ONE, MIN_REPEAT_ONE,
686*cda5da8dSAndroid Build Coastguard Worker                        POSSESSIVE_REPEAT, POSSESSIVE_REPEAT_ONE):
687*cda5da8dSAndroid Build Coastguard Worker                skip, min, max = code[i: i+3]
688*cda5da8dSAndroid Build Coastguard Worker                if max == MAXREPEAT:
689*cda5da8dSAndroid Build Coastguard Worker                    max = 'MAXREPEAT'
690*cda5da8dSAndroid Build Coastguard Worker                print_(op, skip, min, max, to=i+skip)
691*cda5da8dSAndroid Build Coastguard Worker                dis_(i+3, i+skip)
692*cda5da8dSAndroid Build Coastguard Worker                i += skip
693*cda5da8dSAndroid Build Coastguard Worker            elif op is GROUPREF_EXISTS:
694*cda5da8dSAndroid Build Coastguard Worker                arg, skip = code[i: i+2]
695*cda5da8dSAndroid Build Coastguard Worker                print_(op, arg, skip, to=i+skip)
696*cda5da8dSAndroid Build Coastguard Worker                i += 2
697*cda5da8dSAndroid Build Coastguard Worker            elif op in (ASSERT, ASSERT_NOT):
698*cda5da8dSAndroid Build Coastguard Worker                skip, arg = code[i: i+2]
699*cda5da8dSAndroid Build Coastguard Worker                print_(op, skip, arg, to=i+skip)
700*cda5da8dSAndroid Build Coastguard Worker                dis_(i+2, i+skip)
701*cda5da8dSAndroid Build Coastguard Worker                i += skip
702*cda5da8dSAndroid Build Coastguard Worker            elif op is ATOMIC_GROUP:
703*cda5da8dSAndroid Build Coastguard Worker                skip = code[i]
704*cda5da8dSAndroid Build Coastguard Worker                print_(op, skip, to=i+skip)
705*cda5da8dSAndroid Build Coastguard Worker                dis_(i+1, i+skip)
706*cda5da8dSAndroid Build Coastguard Worker                i += skip
707*cda5da8dSAndroid Build Coastguard Worker            elif op is INFO:
708*cda5da8dSAndroid Build Coastguard Worker                skip, flags, min, max = code[i: i+4]
709*cda5da8dSAndroid Build Coastguard Worker                if max == MAXREPEAT:
710*cda5da8dSAndroid Build Coastguard Worker                    max = 'MAXREPEAT'
711*cda5da8dSAndroid Build Coastguard Worker                print_(op, skip, bin(flags), min, max, to=i+skip)
712*cda5da8dSAndroid Build Coastguard Worker                start = i+4
713*cda5da8dSAndroid Build Coastguard Worker                if flags & SRE_INFO_PREFIX:
714*cda5da8dSAndroid Build Coastguard Worker                    prefix_len, prefix_skip = code[i+4: i+6]
715*cda5da8dSAndroid Build Coastguard Worker                    print_2('  prefix_skip', prefix_skip)
716*cda5da8dSAndroid Build Coastguard Worker                    start = i + 6
717*cda5da8dSAndroid Build Coastguard Worker                    prefix = code[start: start+prefix_len]
718*cda5da8dSAndroid Build Coastguard Worker                    print_2('  prefix',
719*cda5da8dSAndroid Build Coastguard Worker                            '[%s]' % ', '.join('%#02x' % x for x in prefix),
720*cda5da8dSAndroid Build Coastguard Worker                            '(%r)' % ''.join(map(chr, prefix)))
721*cda5da8dSAndroid Build Coastguard Worker                    start += prefix_len
722*cda5da8dSAndroid Build Coastguard Worker                    print_2('  overlap', code[start: start+prefix_len])
723*cda5da8dSAndroid Build Coastguard Worker                    start += prefix_len
724*cda5da8dSAndroid Build Coastguard Worker                if flags & SRE_INFO_CHARSET:
725*cda5da8dSAndroid Build Coastguard Worker                    level += 1
726*cda5da8dSAndroid Build Coastguard Worker                    print_2('in')
727*cda5da8dSAndroid Build Coastguard Worker                    dis_(start, i+skip)
728*cda5da8dSAndroid Build Coastguard Worker                    level -= 1
729*cda5da8dSAndroid Build Coastguard Worker                i += skip
730*cda5da8dSAndroid Build Coastguard Worker            else:
731*cda5da8dSAndroid Build Coastguard Worker                raise ValueError(op)
732*cda5da8dSAndroid Build Coastguard Worker
733*cda5da8dSAndroid Build Coastguard Worker        level -= 1
734*cda5da8dSAndroid Build Coastguard Worker
735*cda5da8dSAndroid Build Coastguard Worker    dis_(0, len(code))
736*cda5da8dSAndroid Build Coastguard Worker
737*cda5da8dSAndroid Build Coastguard Worker
738*cda5da8dSAndroid Build Coastguard Workerdef compile(p, flags=0):
739*cda5da8dSAndroid Build Coastguard Worker    # internal: convert pattern list to internal format
740*cda5da8dSAndroid Build Coastguard Worker
741*cda5da8dSAndroid Build Coastguard Worker    if isstring(p):
742*cda5da8dSAndroid Build Coastguard Worker        pattern = p
743*cda5da8dSAndroid Build Coastguard Worker        p = _parser.parse(p, flags)
744*cda5da8dSAndroid Build Coastguard Worker    else:
745*cda5da8dSAndroid Build Coastguard Worker        pattern = None
746*cda5da8dSAndroid Build Coastguard Worker
747*cda5da8dSAndroid Build Coastguard Worker    code = _code(p, flags)
748*cda5da8dSAndroid Build Coastguard Worker
749*cda5da8dSAndroid Build Coastguard Worker    if flags & SRE_FLAG_DEBUG:
750*cda5da8dSAndroid Build Coastguard Worker        print()
751*cda5da8dSAndroid Build Coastguard Worker        dis(code)
752*cda5da8dSAndroid Build Coastguard Worker
753*cda5da8dSAndroid Build Coastguard Worker    # map in either direction
754*cda5da8dSAndroid Build Coastguard Worker    groupindex = p.state.groupdict
755*cda5da8dSAndroid Build Coastguard Worker    indexgroup = [None] * p.state.groups
756*cda5da8dSAndroid Build Coastguard Worker    for k, i in groupindex.items():
757*cda5da8dSAndroid Build Coastguard Worker        indexgroup[i] = k
758*cda5da8dSAndroid Build Coastguard Worker
759*cda5da8dSAndroid Build Coastguard Worker    return _sre.compile(
760*cda5da8dSAndroid Build Coastguard Worker        pattern, flags | p.state.flags, code,
761*cda5da8dSAndroid Build Coastguard Worker        p.state.groups-1,
762*cda5da8dSAndroid Build Coastguard Worker        groupindex, tuple(indexgroup)
763*cda5da8dSAndroid Build Coastguard Worker        )
764