1*cda5da8dSAndroid Build Coastguard Worker# 2*cda5da8dSAndroid Build Coastguard Worker# Secret Labs' Regular Expression Engine 3*cda5da8dSAndroid Build Coastguard Worker# 4*cda5da8dSAndroid Build Coastguard Worker# convert template to internal format 5*cda5da8dSAndroid Build Coastguard Worker# 6*cda5da8dSAndroid Build Coastguard Worker# Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. 7*cda5da8dSAndroid Build Coastguard Worker# 8*cda5da8dSAndroid Build Coastguard Worker# See the __init__.py file for information on usage and redistribution. 9*cda5da8dSAndroid Build Coastguard Worker# 10*cda5da8dSAndroid Build Coastguard Worker 11*cda5da8dSAndroid Build Coastguard Worker"""Internal support module for sre""" 12*cda5da8dSAndroid Build Coastguard Worker 13*cda5da8dSAndroid Build Coastguard Workerimport _sre 14*cda5da8dSAndroid Build Coastguard Workerfrom . import _parser 15*cda5da8dSAndroid Build Coastguard Workerfrom ._constants import * 16*cda5da8dSAndroid Build Coastguard Workerfrom ._casefix import _EXTRA_CASES 17*cda5da8dSAndroid Build Coastguard Worker 18*cda5da8dSAndroid Build Coastguard Workerassert _sre.MAGIC == MAGIC, "SRE module mismatch" 19*cda5da8dSAndroid Build Coastguard Worker 20*cda5da8dSAndroid Build Coastguard Worker_LITERAL_CODES = {LITERAL, NOT_LITERAL} 21*cda5da8dSAndroid Build Coastguard Worker_SUCCESS_CODES = {SUCCESS, FAILURE} 22*cda5da8dSAndroid Build Coastguard Worker_ASSERT_CODES = {ASSERT, ASSERT_NOT} 23*cda5da8dSAndroid Build Coastguard Worker_UNIT_CODES = _LITERAL_CODES | {ANY, IN} 24*cda5da8dSAndroid Build Coastguard Worker 25*cda5da8dSAndroid Build Coastguard Worker_REPEATING_CODES = { 26*cda5da8dSAndroid Build Coastguard Worker MIN_REPEAT: (REPEAT, MIN_UNTIL, MIN_REPEAT_ONE), 27*cda5da8dSAndroid Build Coastguard Worker MAX_REPEAT: (REPEAT, MAX_UNTIL, REPEAT_ONE), 28*cda5da8dSAndroid Build Coastguard Worker POSSESSIVE_REPEAT: (POSSESSIVE_REPEAT, SUCCESS, POSSESSIVE_REPEAT_ONE), 29*cda5da8dSAndroid Build Coastguard Worker} 30*cda5da8dSAndroid Build Coastguard Worker 31*cda5da8dSAndroid Build Coastguard Workerdef _combine_flags(flags, add_flags, del_flags, 32*cda5da8dSAndroid Build Coastguard Worker TYPE_FLAGS=_parser.TYPE_FLAGS): 33*cda5da8dSAndroid Build Coastguard Worker if add_flags & TYPE_FLAGS: 34*cda5da8dSAndroid Build Coastguard Worker flags &= ~TYPE_FLAGS 35*cda5da8dSAndroid Build Coastguard Worker return (flags | add_flags) & ~del_flags 36*cda5da8dSAndroid Build Coastguard Worker 37*cda5da8dSAndroid Build Coastguard Workerdef _compile(code, pattern, flags): 38*cda5da8dSAndroid Build Coastguard Worker # internal: compile a (sub)pattern 39*cda5da8dSAndroid Build Coastguard Worker emit = code.append 40*cda5da8dSAndroid Build Coastguard Worker _len = len 41*cda5da8dSAndroid Build Coastguard Worker LITERAL_CODES = _LITERAL_CODES 42*cda5da8dSAndroid Build Coastguard Worker REPEATING_CODES = _REPEATING_CODES 43*cda5da8dSAndroid Build Coastguard Worker SUCCESS_CODES = _SUCCESS_CODES 44*cda5da8dSAndroid Build Coastguard Worker ASSERT_CODES = _ASSERT_CODES 45*cda5da8dSAndroid Build Coastguard Worker iscased = None 46*cda5da8dSAndroid Build Coastguard Worker tolower = None 47*cda5da8dSAndroid Build Coastguard Worker fixes = None 48*cda5da8dSAndroid Build Coastguard Worker if flags & SRE_FLAG_IGNORECASE and not flags & SRE_FLAG_LOCALE: 49*cda5da8dSAndroid Build Coastguard Worker if flags & SRE_FLAG_UNICODE: 50*cda5da8dSAndroid Build Coastguard Worker iscased = _sre.unicode_iscased 51*cda5da8dSAndroid Build Coastguard Worker tolower = _sre.unicode_tolower 52*cda5da8dSAndroid Build Coastguard Worker fixes = _EXTRA_CASES 53*cda5da8dSAndroid Build Coastguard Worker else: 54*cda5da8dSAndroid Build Coastguard Worker iscased = _sre.ascii_iscased 55*cda5da8dSAndroid Build Coastguard Worker tolower = _sre.ascii_tolower 56*cda5da8dSAndroid Build Coastguard Worker for op, av in pattern: 57*cda5da8dSAndroid Build Coastguard Worker if op in LITERAL_CODES: 58*cda5da8dSAndroid Build Coastguard Worker if not flags & SRE_FLAG_IGNORECASE: 59*cda5da8dSAndroid Build Coastguard Worker emit(op) 60*cda5da8dSAndroid Build Coastguard Worker emit(av) 61*cda5da8dSAndroid Build Coastguard Worker elif flags & SRE_FLAG_LOCALE: 62*cda5da8dSAndroid Build Coastguard Worker emit(OP_LOCALE_IGNORE[op]) 63*cda5da8dSAndroid Build Coastguard Worker emit(av) 64*cda5da8dSAndroid Build Coastguard Worker elif not iscased(av): 65*cda5da8dSAndroid Build Coastguard Worker emit(op) 66*cda5da8dSAndroid Build Coastguard Worker emit(av) 67*cda5da8dSAndroid Build Coastguard Worker else: 68*cda5da8dSAndroid Build Coastguard Worker lo = tolower(av) 69*cda5da8dSAndroid Build Coastguard Worker if not fixes: # ascii 70*cda5da8dSAndroid Build Coastguard Worker emit(OP_IGNORE[op]) 71*cda5da8dSAndroid Build Coastguard Worker emit(lo) 72*cda5da8dSAndroid Build Coastguard Worker elif lo not in fixes: 73*cda5da8dSAndroid Build Coastguard Worker emit(OP_UNICODE_IGNORE[op]) 74*cda5da8dSAndroid Build Coastguard Worker emit(lo) 75*cda5da8dSAndroid Build Coastguard Worker else: 76*cda5da8dSAndroid Build Coastguard Worker emit(IN_UNI_IGNORE) 77*cda5da8dSAndroid Build Coastguard Worker skip = _len(code); emit(0) 78*cda5da8dSAndroid Build Coastguard Worker if op is NOT_LITERAL: 79*cda5da8dSAndroid Build Coastguard Worker emit(NEGATE) 80*cda5da8dSAndroid Build Coastguard Worker for k in (lo,) + fixes[lo]: 81*cda5da8dSAndroid Build Coastguard Worker emit(LITERAL) 82*cda5da8dSAndroid Build Coastguard Worker emit(k) 83*cda5da8dSAndroid Build Coastguard Worker emit(FAILURE) 84*cda5da8dSAndroid Build Coastguard Worker code[skip] = _len(code) - skip 85*cda5da8dSAndroid Build Coastguard Worker elif op is IN: 86*cda5da8dSAndroid Build Coastguard Worker charset, hascased = _optimize_charset(av, iscased, tolower, fixes) 87*cda5da8dSAndroid Build Coastguard Worker if flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE: 88*cda5da8dSAndroid Build Coastguard Worker emit(IN_LOC_IGNORE) 89*cda5da8dSAndroid Build Coastguard Worker elif not hascased: 90*cda5da8dSAndroid Build Coastguard Worker emit(IN) 91*cda5da8dSAndroid Build Coastguard Worker elif not fixes: # ascii 92*cda5da8dSAndroid Build Coastguard Worker emit(IN_IGNORE) 93*cda5da8dSAndroid Build Coastguard Worker else: 94*cda5da8dSAndroid Build Coastguard Worker emit(IN_UNI_IGNORE) 95*cda5da8dSAndroid Build Coastguard Worker skip = _len(code); emit(0) 96*cda5da8dSAndroid Build Coastguard Worker _compile_charset(charset, flags, code) 97*cda5da8dSAndroid Build Coastguard Worker code[skip] = _len(code) - skip 98*cda5da8dSAndroid Build Coastguard Worker elif op is ANY: 99*cda5da8dSAndroid Build Coastguard Worker if flags & SRE_FLAG_DOTALL: 100*cda5da8dSAndroid Build Coastguard Worker emit(ANY_ALL) 101*cda5da8dSAndroid Build Coastguard Worker else: 102*cda5da8dSAndroid Build Coastguard Worker emit(ANY) 103*cda5da8dSAndroid Build Coastguard Worker elif op in REPEATING_CODES: 104*cda5da8dSAndroid Build Coastguard Worker if flags & SRE_FLAG_TEMPLATE: 105*cda5da8dSAndroid Build Coastguard Worker raise error("internal: unsupported template operator %r" % (op,)) 106*cda5da8dSAndroid Build Coastguard Worker if _simple(av[2]): 107*cda5da8dSAndroid Build Coastguard Worker emit(REPEATING_CODES[op][2]) 108*cda5da8dSAndroid Build Coastguard Worker skip = _len(code); emit(0) 109*cda5da8dSAndroid Build Coastguard Worker emit(av[0]) 110*cda5da8dSAndroid Build Coastguard Worker emit(av[1]) 111*cda5da8dSAndroid Build Coastguard Worker _compile(code, av[2], flags) 112*cda5da8dSAndroid Build Coastguard Worker emit(SUCCESS) 113*cda5da8dSAndroid Build Coastguard Worker code[skip] = _len(code) - skip 114*cda5da8dSAndroid Build Coastguard Worker else: 115*cda5da8dSAndroid Build Coastguard Worker emit(REPEATING_CODES[op][0]) 116*cda5da8dSAndroid Build Coastguard Worker skip = _len(code); emit(0) 117*cda5da8dSAndroid Build Coastguard Worker emit(av[0]) 118*cda5da8dSAndroid Build Coastguard Worker emit(av[1]) 119*cda5da8dSAndroid Build Coastguard Worker _compile(code, av[2], flags) 120*cda5da8dSAndroid Build Coastguard Worker code[skip] = _len(code) - skip 121*cda5da8dSAndroid Build Coastguard Worker emit(REPEATING_CODES[op][1]) 122*cda5da8dSAndroid Build Coastguard Worker elif op is SUBPATTERN: 123*cda5da8dSAndroid Build Coastguard Worker group, add_flags, del_flags, p = av 124*cda5da8dSAndroid Build Coastguard Worker if group: 125*cda5da8dSAndroid Build Coastguard Worker emit(MARK) 126*cda5da8dSAndroid Build Coastguard Worker emit((group-1)*2) 127*cda5da8dSAndroid Build Coastguard Worker # _compile_info(code, p, _combine_flags(flags, add_flags, del_flags)) 128*cda5da8dSAndroid Build Coastguard Worker _compile(code, p, _combine_flags(flags, add_flags, del_flags)) 129*cda5da8dSAndroid Build Coastguard Worker if group: 130*cda5da8dSAndroid Build Coastguard Worker emit(MARK) 131*cda5da8dSAndroid Build Coastguard Worker emit((group-1)*2+1) 132*cda5da8dSAndroid Build Coastguard Worker elif op is ATOMIC_GROUP: 133*cda5da8dSAndroid Build Coastguard Worker # Atomic Groups are handled by starting with an Atomic 134*cda5da8dSAndroid Build Coastguard Worker # Group op code, then putting in the atomic group pattern 135*cda5da8dSAndroid Build Coastguard Worker # and finally a success op code to tell any repeat 136*cda5da8dSAndroid Build Coastguard Worker # operations within the Atomic Group to stop eating and 137*cda5da8dSAndroid Build Coastguard Worker # pop their stack if they reach it 138*cda5da8dSAndroid Build Coastguard Worker emit(ATOMIC_GROUP) 139*cda5da8dSAndroid Build Coastguard Worker skip = _len(code); emit(0) 140*cda5da8dSAndroid Build Coastguard Worker _compile(code, av, flags) 141*cda5da8dSAndroid Build Coastguard Worker emit(SUCCESS) 142*cda5da8dSAndroid Build Coastguard Worker code[skip] = _len(code) - skip 143*cda5da8dSAndroid Build Coastguard Worker elif op in SUCCESS_CODES: 144*cda5da8dSAndroid Build Coastguard Worker emit(op) 145*cda5da8dSAndroid Build Coastguard Worker elif op in ASSERT_CODES: 146*cda5da8dSAndroid Build Coastguard Worker emit(op) 147*cda5da8dSAndroid Build Coastguard Worker skip = _len(code); emit(0) 148*cda5da8dSAndroid Build Coastguard Worker if av[0] >= 0: 149*cda5da8dSAndroid Build Coastguard Worker emit(0) # look ahead 150*cda5da8dSAndroid Build Coastguard Worker else: 151*cda5da8dSAndroid Build Coastguard Worker lo, hi = av[1].getwidth() 152*cda5da8dSAndroid Build Coastguard Worker if lo != hi: 153*cda5da8dSAndroid Build Coastguard Worker raise error("look-behind requires fixed-width pattern") 154*cda5da8dSAndroid Build Coastguard Worker emit(lo) # look behind 155*cda5da8dSAndroid Build Coastguard Worker _compile(code, av[1], flags) 156*cda5da8dSAndroid Build Coastguard Worker emit(SUCCESS) 157*cda5da8dSAndroid Build Coastguard Worker code[skip] = _len(code) - skip 158*cda5da8dSAndroid Build Coastguard Worker elif op is AT: 159*cda5da8dSAndroid Build Coastguard Worker emit(op) 160*cda5da8dSAndroid Build Coastguard Worker if flags & SRE_FLAG_MULTILINE: 161*cda5da8dSAndroid Build Coastguard Worker av = AT_MULTILINE.get(av, av) 162*cda5da8dSAndroid Build Coastguard Worker if flags & SRE_FLAG_LOCALE: 163*cda5da8dSAndroid Build Coastguard Worker av = AT_LOCALE.get(av, av) 164*cda5da8dSAndroid Build Coastguard Worker elif flags & SRE_FLAG_UNICODE: 165*cda5da8dSAndroid Build Coastguard Worker av = AT_UNICODE.get(av, av) 166*cda5da8dSAndroid Build Coastguard Worker emit(av) 167*cda5da8dSAndroid Build Coastguard Worker elif op is BRANCH: 168*cda5da8dSAndroid Build Coastguard Worker emit(op) 169*cda5da8dSAndroid Build Coastguard Worker tail = [] 170*cda5da8dSAndroid Build Coastguard Worker tailappend = tail.append 171*cda5da8dSAndroid Build Coastguard Worker for av in av[1]: 172*cda5da8dSAndroid Build Coastguard Worker skip = _len(code); emit(0) 173*cda5da8dSAndroid Build Coastguard Worker # _compile_info(code, av, flags) 174*cda5da8dSAndroid Build Coastguard Worker _compile(code, av, flags) 175*cda5da8dSAndroid Build Coastguard Worker emit(JUMP) 176*cda5da8dSAndroid Build Coastguard Worker tailappend(_len(code)); emit(0) 177*cda5da8dSAndroid Build Coastguard Worker code[skip] = _len(code) - skip 178*cda5da8dSAndroid Build Coastguard Worker emit(FAILURE) # end of branch 179*cda5da8dSAndroid Build Coastguard Worker for tail in tail: 180*cda5da8dSAndroid Build Coastguard Worker code[tail] = _len(code) - tail 181*cda5da8dSAndroid Build Coastguard Worker elif op is CATEGORY: 182*cda5da8dSAndroid Build Coastguard Worker emit(op) 183*cda5da8dSAndroid Build Coastguard Worker if flags & SRE_FLAG_LOCALE: 184*cda5da8dSAndroid Build Coastguard Worker av = CH_LOCALE[av] 185*cda5da8dSAndroid Build Coastguard Worker elif flags & SRE_FLAG_UNICODE: 186*cda5da8dSAndroid Build Coastguard Worker av = CH_UNICODE[av] 187*cda5da8dSAndroid Build Coastguard Worker emit(av) 188*cda5da8dSAndroid Build Coastguard Worker elif op is GROUPREF: 189*cda5da8dSAndroid Build Coastguard Worker if not flags & SRE_FLAG_IGNORECASE: 190*cda5da8dSAndroid Build Coastguard Worker emit(op) 191*cda5da8dSAndroid Build Coastguard Worker elif flags & SRE_FLAG_LOCALE: 192*cda5da8dSAndroid Build Coastguard Worker emit(GROUPREF_LOC_IGNORE) 193*cda5da8dSAndroid Build Coastguard Worker elif not fixes: # ascii 194*cda5da8dSAndroid Build Coastguard Worker emit(GROUPREF_IGNORE) 195*cda5da8dSAndroid Build Coastguard Worker else: 196*cda5da8dSAndroid Build Coastguard Worker emit(GROUPREF_UNI_IGNORE) 197*cda5da8dSAndroid Build Coastguard Worker emit(av-1) 198*cda5da8dSAndroid Build Coastguard Worker elif op is GROUPREF_EXISTS: 199*cda5da8dSAndroid Build Coastguard Worker emit(op) 200*cda5da8dSAndroid Build Coastguard Worker emit(av[0]-1) 201*cda5da8dSAndroid Build Coastguard Worker skipyes = _len(code); emit(0) 202*cda5da8dSAndroid Build Coastguard Worker _compile(code, av[1], flags) 203*cda5da8dSAndroid Build Coastguard Worker if av[2]: 204*cda5da8dSAndroid Build Coastguard Worker emit(JUMP) 205*cda5da8dSAndroid Build Coastguard Worker skipno = _len(code); emit(0) 206*cda5da8dSAndroid Build Coastguard Worker code[skipyes] = _len(code) - skipyes + 1 207*cda5da8dSAndroid Build Coastguard Worker _compile(code, av[2], flags) 208*cda5da8dSAndroid Build Coastguard Worker code[skipno] = _len(code) - skipno 209*cda5da8dSAndroid Build Coastguard Worker else: 210*cda5da8dSAndroid Build Coastguard Worker code[skipyes] = _len(code) - skipyes + 1 211*cda5da8dSAndroid Build Coastguard Worker else: 212*cda5da8dSAndroid Build Coastguard Worker raise error("internal: unsupported operand type %r" % (op,)) 213*cda5da8dSAndroid Build Coastguard Worker 214*cda5da8dSAndroid Build Coastguard Workerdef _compile_charset(charset, flags, code): 215*cda5da8dSAndroid Build Coastguard Worker # compile charset subprogram 216*cda5da8dSAndroid Build Coastguard Worker emit = code.append 217*cda5da8dSAndroid Build Coastguard Worker for op, av in charset: 218*cda5da8dSAndroid Build Coastguard Worker emit(op) 219*cda5da8dSAndroid Build Coastguard Worker if op is NEGATE: 220*cda5da8dSAndroid Build Coastguard Worker pass 221*cda5da8dSAndroid Build Coastguard Worker elif op is LITERAL: 222*cda5da8dSAndroid Build Coastguard Worker emit(av) 223*cda5da8dSAndroid Build Coastguard Worker elif op is RANGE or op is RANGE_UNI_IGNORE: 224*cda5da8dSAndroid Build Coastguard Worker emit(av[0]) 225*cda5da8dSAndroid Build Coastguard Worker emit(av[1]) 226*cda5da8dSAndroid Build Coastguard Worker elif op is CHARSET: 227*cda5da8dSAndroid Build Coastguard Worker code.extend(av) 228*cda5da8dSAndroid Build Coastguard Worker elif op is BIGCHARSET: 229*cda5da8dSAndroid Build Coastguard Worker code.extend(av) 230*cda5da8dSAndroid Build Coastguard Worker elif op is CATEGORY: 231*cda5da8dSAndroid Build Coastguard Worker if flags & SRE_FLAG_LOCALE: 232*cda5da8dSAndroid Build Coastguard Worker emit(CH_LOCALE[av]) 233*cda5da8dSAndroid Build Coastguard Worker elif flags & SRE_FLAG_UNICODE: 234*cda5da8dSAndroid Build Coastguard Worker emit(CH_UNICODE[av]) 235*cda5da8dSAndroid Build Coastguard Worker else: 236*cda5da8dSAndroid Build Coastguard Worker emit(av) 237*cda5da8dSAndroid Build Coastguard Worker else: 238*cda5da8dSAndroid Build Coastguard Worker raise error("internal: unsupported set operator %r" % (op,)) 239*cda5da8dSAndroid Build Coastguard Worker emit(FAILURE) 240*cda5da8dSAndroid Build Coastguard Worker 241*cda5da8dSAndroid Build Coastguard Workerdef _optimize_charset(charset, iscased=None, fixup=None, fixes=None): 242*cda5da8dSAndroid Build Coastguard Worker # internal: optimize character set 243*cda5da8dSAndroid Build Coastguard Worker out = [] 244*cda5da8dSAndroid Build Coastguard Worker tail = [] 245*cda5da8dSAndroid Build Coastguard Worker charmap = bytearray(256) 246*cda5da8dSAndroid Build Coastguard Worker hascased = False 247*cda5da8dSAndroid Build Coastguard Worker for op, av in charset: 248*cda5da8dSAndroid Build Coastguard Worker while True: 249*cda5da8dSAndroid Build Coastguard Worker try: 250*cda5da8dSAndroid Build Coastguard Worker if op is LITERAL: 251*cda5da8dSAndroid Build Coastguard Worker if fixup: 252*cda5da8dSAndroid Build Coastguard Worker lo = fixup(av) 253*cda5da8dSAndroid Build Coastguard Worker charmap[lo] = 1 254*cda5da8dSAndroid Build Coastguard Worker if fixes and lo in fixes: 255*cda5da8dSAndroid Build Coastguard Worker for k in fixes[lo]: 256*cda5da8dSAndroid Build Coastguard Worker charmap[k] = 1 257*cda5da8dSAndroid Build Coastguard Worker if not hascased and iscased(av): 258*cda5da8dSAndroid Build Coastguard Worker hascased = True 259*cda5da8dSAndroid Build Coastguard Worker else: 260*cda5da8dSAndroid Build Coastguard Worker charmap[av] = 1 261*cda5da8dSAndroid Build Coastguard Worker elif op is RANGE: 262*cda5da8dSAndroid Build Coastguard Worker r = range(av[0], av[1]+1) 263*cda5da8dSAndroid Build Coastguard Worker if fixup: 264*cda5da8dSAndroid Build Coastguard Worker if fixes: 265*cda5da8dSAndroid Build Coastguard Worker for i in map(fixup, r): 266*cda5da8dSAndroid Build Coastguard Worker charmap[i] = 1 267*cda5da8dSAndroid Build Coastguard Worker if i in fixes: 268*cda5da8dSAndroid Build Coastguard Worker for k in fixes[i]: 269*cda5da8dSAndroid Build Coastguard Worker charmap[k] = 1 270*cda5da8dSAndroid Build Coastguard Worker else: 271*cda5da8dSAndroid Build Coastguard Worker for i in map(fixup, r): 272*cda5da8dSAndroid Build Coastguard Worker charmap[i] = 1 273*cda5da8dSAndroid Build Coastguard Worker if not hascased: 274*cda5da8dSAndroid Build Coastguard Worker hascased = any(map(iscased, r)) 275*cda5da8dSAndroid Build Coastguard Worker else: 276*cda5da8dSAndroid Build Coastguard Worker for i in r: 277*cda5da8dSAndroid Build Coastguard Worker charmap[i] = 1 278*cda5da8dSAndroid Build Coastguard Worker elif op is NEGATE: 279*cda5da8dSAndroid Build Coastguard Worker out.append((op, av)) 280*cda5da8dSAndroid Build Coastguard Worker else: 281*cda5da8dSAndroid Build Coastguard Worker tail.append((op, av)) 282*cda5da8dSAndroid Build Coastguard Worker except IndexError: 283*cda5da8dSAndroid Build Coastguard Worker if len(charmap) == 256: 284*cda5da8dSAndroid Build Coastguard Worker # character set contains non-UCS1 character codes 285*cda5da8dSAndroid Build Coastguard Worker charmap += b'\0' * 0xff00 286*cda5da8dSAndroid Build Coastguard Worker continue 287*cda5da8dSAndroid Build Coastguard Worker # Character set contains non-BMP character codes. 288*cda5da8dSAndroid Build Coastguard Worker # For range, all BMP characters in the range are already 289*cda5da8dSAndroid Build Coastguard Worker # proceeded. 290*cda5da8dSAndroid Build Coastguard Worker if fixup: 291*cda5da8dSAndroid Build Coastguard Worker hascased = True 292*cda5da8dSAndroid Build Coastguard Worker # For now, IN_UNI_IGNORE+LITERAL and 293*cda5da8dSAndroid Build Coastguard Worker # IN_UNI_IGNORE+RANGE_UNI_IGNORE work for all non-BMP 294*cda5da8dSAndroid Build Coastguard Worker # characters, because two characters (at least one of 295*cda5da8dSAndroid Build Coastguard Worker # which is not in the BMP) match case-insensitively 296*cda5da8dSAndroid Build Coastguard Worker # if and only if: 297*cda5da8dSAndroid Build Coastguard Worker # 1) c1.lower() == c2.lower() 298*cda5da8dSAndroid Build Coastguard Worker # 2) c1.lower() == c2 or c1.lower().upper() == c2 299*cda5da8dSAndroid Build Coastguard Worker # Also, both c.lower() and c.lower().upper() are single 300*cda5da8dSAndroid Build Coastguard Worker # characters for every non-BMP character. 301*cda5da8dSAndroid Build Coastguard Worker if op is RANGE: 302*cda5da8dSAndroid Build Coastguard Worker op = RANGE_UNI_IGNORE 303*cda5da8dSAndroid Build Coastguard Worker tail.append((op, av)) 304*cda5da8dSAndroid Build Coastguard Worker break 305*cda5da8dSAndroid Build Coastguard Worker 306*cda5da8dSAndroid Build Coastguard Worker # compress character map 307*cda5da8dSAndroid Build Coastguard Worker runs = [] 308*cda5da8dSAndroid Build Coastguard Worker q = 0 309*cda5da8dSAndroid Build Coastguard Worker while True: 310*cda5da8dSAndroid Build Coastguard Worker p = charmap.find(1, q) 311*cda5da8dSAndroid Build Coastguard Worker if p < 0: 312*cda5da8dSAndroid Build Coastguard Worker break 313*cda5da8dSAndroid Build Coastguard Worker if len(runs) >= 2: 314*cda5da8dSAndroid Build Coastguard Worker runs = None 315*cda5da8dSAndroid Build Coastguard Worker break 316*cda5da8dSAndroid Build Coastguard Worker q = charmap.find(0, p) 317*cda5da8dSAndroid Build Coastguard Worker if q < 0: 318*cda5da8dSAndroid Build Coastguard Worker runs.append((p, len(charmap))) 319*cda5da8dSAndroid Build Coastguard Worker break 320*cda5da8dSAndroid Build Coastguard Worker runs.append((p, q)) 321*cda5da8dSAndroid Build Coastguard Worker if runs is not None: 322*cda5da8dSAndroid Build Coastguard Worker # use literal/range 323*cda5da8dSAndroid Build Coastguard Worker for p, q in runs: 324*cda5da8dSAndroid Build Coastguard Worker if q - p == 1: 325*cda5da8dSAndroid Build Coastguard Worker out.append((LITERAL, p)) 326*cda5da8dSAndroid Build Coastguard Worker else: 327*cda5da8dSAndroid Build Coastguard Worker out.append((RANGE, (p, q - 1))) 328*cda5da8dSAndroid Build Coastguard Worker out += tail 329*cda5da8dSAndroid Build Coastguard Worker # if the case was changed or new representation is more compact 330*cda5da8dSAndroid Build Coastguard Worker if hascased or len(out) < len(charset): 331*cda5da8dSAndroid Build Coastguard Worker return out, hascased 332*cda5da8dSAndroid Build Coastguard Worker # else original character set is good enough 333*cda5da8dSAndroid Build Coastguard Worker return charset, hascased 334*cda5da8dSAndroid Build Coastguard Worker 335*cda5da8dSAndroid Build Coastguard Worker # use bitmap 336*cda5da8dSAndroid Build Coastguard Worker if len(charmap) == 256: 337*cda5da8dSAndroid Build Coastguard Worker data = _mk_bitmap(charmap) 338*cda5da8dSAndroid Build Coastguard Worker out.append((CHARSET, data)) 339*cda5da8dSAndroid Build Coastguard Worker out += tail 340*cda5da8dSAndroid Build Coastguard Worker return out, hascased 341*cda5da8dSAndroid Build Coastguard Worker 342*cda5da8dSAndroid Build Coastguard Worker # To represent a big charset, first a bitmap of all characters in the 343*cda5da8dSAndroid Build Coastguard Worker # set is constructed. Then, this bitmap is sliced into chunks of 256 344*cda5da8dSAndroid Build Coastguard Worker # characters, duplicate chunks are eliminated, and each chunk is 345*cda5da8dSAndroid Build Coastguard Worker # given a number. In the compiled expression, the charset is 346*cda5da8dSAndroid Build Coastguard Worker # represented by a 32-bit word sequence, consisting of one word for 347*cda5da8dSAndroid Build Coastguard Worker # the number of different chunks, a sequence of 256 bytes (64 words) 348*cda5da8dSAndroid Build Coastguard Worker # of chunk numbers indexed by their original chunk position, and a 349*cda5da8dSAndroid Build Coastguard Worker # sequence of 256-bit chunks (8 words each). 350*cda5da8dSAndroid Build Coastguard Worker 351*cda5da8dSAndroid Build Coastguard Worker # Compression is normally good: in a typical charset, large ranges of 352*cda5da8dSAndroid Build Coastguard Worker # Unicode will be either completely excluded (e.g. if only cyrillic 353*cda5da8dSAndroid Build Coastguard Worker # letters are to be matched), or completely included (e.g. if large 354*cda5da8dSAndroid Build Coastguard Worker # subranges of Kanji match). These ranges will be represented by 355*cda5da8dSAndroid Build Coastguard Worker # chunks of all one-bits or all zero-bits. 356*cda5da8dSAndroid Build Coastguard Worker 357*cda5da8dSAndroid Build Coastguard Worker # Matching can be also done efficiently: the more significant byte of 358*cda5da8dSAndroid Build Coastguard Worker # the Unicode character is an index into the chunk number, and the 359*cda5da8dSAndroid Build Coastguard Worker # less significant byte is a bit index in the chunk (just like the 360*cda5da8dSAndroid Build Coastguard Worker # CHARSET matching). 361*cda5da8dSAndroid Build Coastguard Worker 362*cda5da8dSAndroid Build Coastguard Worker charmap = bytes(charmap) # should be hashable 363*cda5da8dSAndroid Build Coastguard Worker comps = {} 364*cda5da8dSAndroid Build Coastguard Worker mapping = bytearray(256) 365*cda5da8dSAndroid Build Coastguard Worker block = 0 366*cda5da8dSAndroid Build Coastguard Worker data = bytearray() 367*cda5da8dSAndroid Build Coastguard Worker for i in range(0, 65536, 256): 368*cda5da8dSAndroid Build Coastguard Worker chunk = charmap[i: i + 256] 369*cda5da8dSAndroid Build Coastguard Worker if chunk in comps: 370*cda5da8dSAndroid Build Coastguard Worker mapping[i // 256] = comps[chunk] 371*cda5da8dSAndroid Build Coastguard Worker else: 372*cda5da8dSAndroid Build Coastguard Worker mapping[i // 256] = comps[chunk] = block 373*cda5da8dSAndroid Build Coastguard Worker block += 1 374*cda5da8dSAndroid Build Coastguard Worker data += chunk 375*cda5da8dSAndroid Build Coastguard Worker data = _mk_bitmap(data) 376*cda5da8dSAndroid Build Coastguard Worker data[0:0] = [block] + _bytes_to_codes(mapping) 377*cda5da8dSAndroid Build Coastguard Worker out.append((BIGCHARSET, data)) 378*cda5da8dSAndroid Build Coastguard Worker out += tail 379*cda5da8dSAndroid Build Coastguard Worker return out, hascased 380*cda5da8dSAndroid Build Coastguard Worker 381*cda5da8dSAndroid Build Coastguard Worker_CODEBITS = _sre.CODESIZE * 8 382*cda5da8dSAndroid Build Coastguard WorkerMAXCODE = (1 << _CODEBITS) - 1 383*cda5da8dSAndroid Build Coastguard Worker_BITS_TRANS = b'0' + b'1' * 255 384*cda5da8dSAndroid Build Coastguard Workerdef _mk_bitmap(bits, _CODEBITS=_CODEBITS, _int=int): 385*cda5da8dSAndroid Build Coastguard Worker s = bits.translate(_BITS_TRANS)[::-1] 386*cda5da8dSAndroid Build Coastguard Worker return [_int(s[i - _CODEBITS: i], 2) 387*cda5da8dSAndroid Build Coastguard Worker for i in range(len(s), 0, -_CODEBITS)] 388*cda5da8dSAndroid Build Coastguard Worker 389*cda5da8dSAndroid Build Coastguard Workerdef _bytes_to_codes(b): 390*cda5da8dSAndroid Build Coastguard Worker # Convert block indices to word array 391*cda5da8dSAndroid Build Coastguard Worker a = memoryview(b).cast('I') 392*cda5da8dSAndroid Build Coastguard Worker assert a.itemsize == _sre.CODESIZE 393*cda5da8dSAndroid Build Coastguard Worker assert len(a) * a.itemsize == len(b) 394*cda5da8dSAndroid Build Coastguard Worker return a.tolist() 395*cda5da8dSAndroid Build Coastguard Worker 396*cda5da8dSAndroid Build Coastguard Workerdef _simple(p): 397*cda5da8dSAndroid Build Coastguard Worker # check if this subpattern is a "simple" operator 398*cda5da8dSAndroid Build Coastguard Worker if len(p) != 1: 399*cda5da8dSAndroid Build Coastguard Worker return False 400*cda5da8dSAndroid Build Coastguard Worker op, av = p[0] 401*cda5da8dSAndroid Build Coastguard Worker if op is SUBPATTERN: 402*cda5da8dSAndroid Build Coastguard Worker return av[0] is None and _simple(av[-1]) 403*cda5da8dSAndroid Build Coastguard Worker return op in _UNIT_CODES 404*cda5da8dSAndroid Build Coastguard Worker 405*cda5da8dSAndroid Build Coastguard Workerdef _generate_overlap_table(prefix): 406*cda5da8dSAndroid Build Coastguard Worker """ 407*cda5da8dSAndroid Build Coastguard Worker Generate an overlap table for the following prefix. 408*cda5da8dSAndroid Build Coastguard Worker An overlap table is a table of the same size as the prefix which 409*cda5da8dSAndroid Build Coastguard Worker informs about the potential self-overlap for each index in the prefix: 410*cda5da8dSAndroid Build Coastguard Worker - if overlap[i] == 0, prefix[i:] can't overlap prefix[0:...] 411*cda5da8dSAndroid Build Coastguard Worker - if overlap[i] == k with 0 < k <= i, prefix[i-k+1:i+1] overlaps with 412*cda5da8dSAndroid Build Coastguard Worker prefix[0:k] 413*cda5da8dSAndroid Build Coastguard Worker """ 414*cda5da8dSAndroid Build Coastguard Worker table = [0] * len(prefix) 415*cda5da8dSAndroid Build Coastguard Worker for i in range(1, len(prefix)): 416*cda5da8dSAndroid Build Coastguard Worker idx = table[i - 1] 417*cda5da8dSAndroid Build Coastguard Worker while prefix[i] != prefix[idx]: 418*cda5da8dSAndroid Build Coastguard Worker if idx == 0: 419*cda5da8dSAndroid Build Coastguard Worker table[i] = 0 420*cda5da8dSAndroid Build Coastguard Worker break 421*cda5da8dSAndroid Build Coastguard Worker idx = table[idx - 1] 422*cda5da8dSAndroid Build Coastguard Worker else: 423*cda5da8dSAndroid Build Coastguard Worker table[i] = idx + 1 424*cda5da8dSAndroid Build Coastguard Worker return table 425*cda5da8dSAndroid Build Coastguard Worker 426*cda5da8dSAndroid Build Coastguard Workerdef _get_iscased(flags): 427*cda5da8dSAndroid Build Coastguard Worker if not flags & SRE_FLAG_IGNORECASE: 428*cda5da8dSAndroid Build Coastguard Worker return None 429*cda5da8dSAndroid Build Coastguard Worker elif flags & SRE_FLAG_UNICODE: 430*cda5da8dSAndroid Build Coastguard Worker return _sre.unicode_iscased 431*cda5da8dSAndroid Build Coastguard Worker else: 432*cda5da8dSAndroid Build Coastguard Worker return _sre.ascii_iscased 433*cda5da8dSAndroid Build Coastguard Worker 434*cda5da8dSAndroid Build Coastguard Workerdef _get_literal_prefix(pattern, flags): 435*cda5da8dSAndroid Build Coastguard Worker # look for literal prefix 436*cda5da8dSAndroid Build Coastguard Worker prefix = [] 437*cda5da8dSAndroid Build Coastguard Worker prefixappend = prefix.append 438*cda5da8dSAndroid Build Coastguard Worker prefix_skip = None 439*cda5da8dSAndroid Build Coastguard Worker iscased = _get_iscased(flags) 440*cda5da8dSAndroid Build Coastguard Worker for op, av in pattern.data: 441*cda5da8dSAndroid Build Coastguard Worker if op is LITERAL: 442*cda5da8dSAndroid Build Coastguard Worker if iscased and iscased(av): 443*cda5da8dSAndroid Build Coastguard Worker break 444*cda5da8dSAndroid Build Coastguard Worker prefixappend(av) 445*cda5da8dSAndroid Build Coastguard Worker elif op is SUBPATTERN: 446*cda5da8dSAndroid Build Coastguard Worker group, add_flags, del_flags, p = av 447*cda5da8dSAndroid Build Coastguard Worker flags1 = _combine_flags(flags, add_flags, del_flags) 448*cda5da8dSAndroid Build Coastguard Worker if flags1 & SRE_FLAG_IGNORECASE and flags1 & SRE_FLAG_LOCALE: 449*cda5da8dSAndroid Build Coastguard Worker break 450*cda5da8dSAndroid Build Coastguard Worker prefix1, prefix_skip1, got_all = _get_literal_prefix(p, flags1) 451*cda5da8dSAndroid Build Coastguard Worker if prefix_skip is None: 452*cda5da8dSAndroid Build Coastguard Worker if group is not None: 453*cda5da8dSAndroid Build Coastguard Worker prefix_skip = len(prefix) 454*cda5da8dSAndroid Build Coastguard Worker elif prefix_skip1 is not None: 455*cda5da8dSAndroid Build Coastguard Worker prefix_skip = len(prefix) + prefix_skip1 456*cda5da8dSAndroid Build Coastguard Worker prefix.extend(prefix1) 457*cda5da8dSAndroid Build Coastguard Worker if not got_all: 458*cda5da8dSAndroid Build Coastguard Worker break 459*cda5da8dSAndroid Build Coastguard Worker else: 460*cda5da8dSAndroid Build Coastguard Worker break 461*cda5da8dSAndroid Build Coastguard Worker else: 462*cda5da8dSAndroid Build Coastguard Worker return prefix, prefix_skip, True 463*cda5da8dSAndroid Build Coastguard Worker return prefix, prefix_skip, False 464*cda5da8dSAndroid Build Coastguard Worker 465*cda5da8dSAndroid Build Coastguard Workerdef _get_charset_prefix(pattern, flags): 466*cda5da8dSAndroid Build Coastguard Worker while True: 467*cda5da8dSAndroid Build Coastguard Worker if not pattern.data: 468*cda5da8dSAndroid Build Coastguard Worker return None 469*cda5da8dSAndroid Build Coastguard Worker op, av = pattern.data[0] 470*cda5da8dSAndroid Build Coastguard Worker if op is not SUBPATTERN: 471*cda5da8dSAndroid Build Coastguard Worker break 472*cda5da8dSAndroid Build Coastguard Worker group, add_flags, del_flags, pattern = av 473*cda5da8dSAndroid Build Coastguard Worker flags = _combine_flags(flags, add_flags, del_flags) 474*cda5da8dSAndroid Build Coastguard Worker if flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE: 475*cda5da8dSAndroid Build Coastguard Worker return None 476*cda5da8dSAndroid Build Coastguard Worker 477*cda5da8dSAndroid Build Coastguard Worker iscased = _get_iscased(flags) 478*cda5da8dSAndroid Build Coastguard Worker if op is LITERAL: 479*cda5da8dSAndroid Build Coastguard Worker if iscased and iscased(av): 480*cda5da8dSAndroid Build Coastguard Worker return None 481*cda5da8dSAndroid Build Coastguard Worker return [(op, av)] 482*cda5da8dSAndroid Build Coastguard Worker elif op is BRANCH: 483*cda5da8dSAndroid Build Coastguard Worker charset = [] 484*cda5da8dSAndroid Build Coastguard Worker charsetappend = charset.append 485*cda5da8dSAndroid Build Coastguard Worker for p in av[1]: 486*cda5da8dSAndroid Build Coastguard Worker if not p: 487*cda5da8dSAndroid Build Coastguard Worker return None 488*cda5da8dSAndroid Build Coastguard Worker op, av = p[0] 489*cda5da8dSAndroid Build Coastguard Worker if op is LITERAL and not (iscased and iscased(av)): 490*cda5da8dSAndroid Build Coastguard Worker charsetappend((op, av)) 491*cda5da8dSAndroid Build Coastguard Worker else: 492*cda5da8dSAndroid Build Coastguard Worker return None 493*cda5da8dSAndroid Build Coastguard Worker return charset 494*cda5da8dSAndroid Build Coastguard Worker elif op is IN: 495*cda5da8dSAndroid Build Coastguard Worker charset = av 496*cda5da8dSAndroid Build Coastguard Worker if iscased: 497*cda5da8dSAndroid Build Coastguard Worker for op, av in charset: 498*cda5da8dSAndroid Build Coastguard Worker if op is LITERAL: 499*cda5da8dSAndroid Build Coastguard Worker if iscased(av): 500*cda5da8dSAndroid Build Coastguard Worker return None 501*cda5da8dSAndroid Build Coastguard Worker elif op is RANGE: 502*cda5da8dSAndroid Build Coastguard Worker if av[1] > 0xffff: 503*cda5da8dSAndroid Build Coastguard Worker return None 504*cda5da8dSAndroid Build Coastguard Worker if any(map(iscased, range(av[0], av[1]+1))): 505*cda5da8dSAndroid Build Coastguard Worker return None 506*cda5da8dSAndroid Build Coastguard Worker return charset 507*cda5da8dSAndroid Build Coastguard Worker return None 508*cda5da8dSAndroid Build Coastguard Worker 509*cda5da8dSAndroid Build Coastguard Workerdef _compile_info(code, pattern, flags): 510*cda5da8dSAndroid Build Coastguard Worker # internal: compile an info block. in the current version, 511*cda5da8dSAndroid Build Coastguard Worker # this contains min/max pattern width, and an optional literal 512*cda5da8dSAndroid Build Coastguard Worker # prefix or a character map 513*cda5da8dSAndroid Build Coastguard Worker lo, hi = pattern.getwidth() 514*cda5da8dSAndroid Build Coastguard Worker if hi > MAXCODE: 515*cda5da8dSAndroid Build Coastguard Worker hi = MAXCODE 516*cda5da8dSAndroid Build Coastguard Worker if lo == 0: 517*cda5da8dSAndroid Build Coastguard Worker code.extend([INFO, 4, 0, lo, hi]) 518*cda5da8dSAndroid Build Coastguard Worker return 519*cda5da8dSAndroid Build Coastguard Worker # look for a literal prefix 520*cda5da8dSAndroid Build Coastguard Worker prefix = [] 521*cda5da8dSAndroid Build Coastguard Worker prefix_skip = 0 522*cda5da8dSAndroid Build Coastguard Worker charset = [] # not used 523*cda5da8dSAndroid Build Coastguard Worker if not (flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE): 524*cda5da8dSAndroid Build Coastguard Worker # look for literal prefix 525*cda5da8dSAndroid Build Coastguard Worker prefix, prefix_skip, got_all = _get_literal_prefix(pattern, flags) 526*cda5da8dSAndroid Build Coastguard Worker # if no prefix, look for charset prefix 527*cda5da8dSAndroid Build Coastguard Worker if not prefix: 528*cda5da8dSAndroid Build Coastguard Worker charset = _get_charset_prefix(pattern, flags) 529*cda5da8dSAndroid Build Coastguard Worker## if prefix: 530*cda5da8dSAndroid Build Coastguard Worker## print("*** PREFIX", prefix, prefix_skip) 531*cda5da8dSAndroid Build Coastguard Worker## if charset: 532*cda5da8dSAndroid Build Coastguard Worker## print("*** CHARSET", charset) 533*cda5da8dSAndroid Build Coastguard Worker # add an info block 534*cda5da8dSAndroid Build Coastguard Worker emit = code.append 535*cda5da8dSAndroid Build Coastguard Worker emit(INFO) 536*cda5da8dSAndroid Build Coastguard Worker skip = len(code); emit(0) 537*cda5da8dSAndroid Build Coastguard Worker # literal flag 538*cda5da8dSAndroid Build Coastguard Worker mask = 0 539*cda5da8dSAndroid Build Coastguard Worker if prefix: 540*cda5da8dSAndroid Build Coastguard Worker mask = SRE_INFO_PREFIX 541*cda5da8dSAndroid Build Coastguard Worker if prefix_skip is None and got_all: 542*cda5da8dSAndroid Build Coastguard Worker mask = mask | SRE_INFO_LITERAL 543*cda5da8dSAndroid Build Coastguard Worker elif charset: 544*cda5da8dSAndroid Build Coastguard Worker mask = mask | SRE_INFO_CHARSET 545*cda5da8dSAndroid Build Coastguard Worker emit(mask) 546*cda5da8dSAndroid Build Coastguard Worker # pattern length 547*cda5da8dSAndroid Build Coastguard Worker if lo < MAXCODE: 548*cda5da8dSAndroid Build Coastguard Worker emit(lo) 549*cda5da8dSAndroid Build Coastguard Worker else: 550*cda5da8dSAndroid Build Coastguard Worker emit(MAXCODE) 551*cda5da8dSAndroid Build Coastguard Worker prefix = prefix[:MAXCODE] 552*cda5da8dSAndroid Build Coastguard Worker emit(min(hi, MAXCODE)) 553*cda5da8dSAndroid Build Coastguard Worker # add literal prefix 554*cda5da8dSAndroid Build Coastguard Worker if prefix: 555*cda5da8dSAndroid Build Coastguard Worker emit(len(prefix)) # length 556*cda5da8dSAndroid Build Coastguard Worker if prefix_skip is None: 557*cda5da8dSAndroid Build Coastguard Worker prefix_skip = len(prefix) 558*cda5da8dSAndroid Build Coastguard Worker emit(prefix_skip) # skip 559*cda5da8dSAndroid Build Coastguard Worker code.extend(prefix) 560*cda5da8dSAndroid Build Coastguard Worker # generate overlap table 561*cda5da8dSAndroid Build Coastguard Worker code.extend(_generate_overlap_table(prefix)) 562*cda5da8dSAndroid Build Coastguard Worker elif charset: 563*cda5da8dSAndroid Build Coastguard Worker charset, hascased = _optimize_charset(charset) 564*cda5da8dSAndroid Build Coastguard Worker assert not hascased 565*cda5da8dSAndroid Build Coastguard Worker _compile_charset(charset, flags, code) 566*cda5da8dSAndroid Build Coastguard Worker code[skip] = len(code) - skip 567*cda5da8dSAndroid Build Coastguard Worker 568*cda5da8dSAndroid Build Coastguard Workerdef isstring(obj): 569*cda5da8dSAndroid Build Coastguard Worker return isinstance(obj, (str, bytes)) 570*cda5da8dSAndroid Build Coastguard Worker 571*cda5da8dSAndroid Build Coastguard Workerdef _code(p, flags): 572*cda5da8dSAndroid Build Coastguard Worker 573*cda5da8dSAndroid Build Coastguard Worker flags = p.state.flags | flags 574*cda5da8dSAndroid Build Coastguard Worker code = [] 575*cda5da8dSAndroid Build Coastguard Worker 576*cda5da8dSAndroid Build Coastguard Worker # compile info block 577*cda5da8dSAndroid Build Coastguard Worker _compile_info(code, p, flags) 578*cda5da8dSAndroid Build Coastguard Worker 579*cda5da8dSAndroid Build Coastguard Worker # compile the pattern 580*cda5da8dSAndroid Build Coastguard Worker _compile(code, p.data, flags) 581*cda5da8dSAndroid Build Coastguard Worker 582*cda5da8dSAndroid Build Coastguard Worker code.append(SUCCESS) 583*cda5da8dSAndroid Build Coastguard Worker 584*cda5da8dSAndroid Build Coastguard Worker return code 585*cda5da8dSAndroid Build Coastguard Worker 586*cda5da8dSAndroid Build Coastguard Workerdef _hex_code(code): 587*cda5da8dSAndroid Build Coastguard Worker return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code) 588*cda5da8dSAndroid Build Coastguard Worker 589*cda5da8dSAndroid Build Coastguard Workerdef dis(code): 590*cda5da8dSAndroid Build Coastguard Worker import sys 591*cda5da8dSAndroid Build Coastguard Worker 592*cda5da8dSAndroid Build Coastguard Worker labels = set() 593*cda5da8dSAndroid Build Coastguard Worker level = 0 594*cda5da8dSAndroid Build Coastguard Worker offset_width = len(str(len(code) - 1)) 595*cda5da8dSAndroid Build Coastguard Worker 596*cda5da8dSAndroid Build Coastguard Worker def dis_(start, end): 597*cda5da8dSAndroid Build Coastguard Worker def print_(*args, to=None): 598*cda5da8dSAndroid Build Coastguard Worker if to is not None: 599*cda5da8dSAndroid Build Coastguard Worker labels.add(to) 600*cda5da8dSAndroid Build Coastguard Worker args += ('(to %d)' % (to,),) 601*cda5da8dSAndroid Build Coastguard Worker print('%*d%s ' % (offset_width, start, ':' if start in labels else '.'), 602*cda5da8dSAndroid Build Coastguard Worker end=' '*(level-1)) 603*cda5da8dSAndroid Build Coastguard Worker print(*args) 604*cda5da8dSAndroid Build Coastguard Worker 605*cda5da8dSAndroid Build Coastguard Worker def print_2(*args): 606*cda5da8dSAndroid Build Coastguard Worker print(end=' '*(offset_width + 2*level)) 607*cda5da8dSAndroid Build Coastguard Worker print(*args) 608*cda5da8dSAndroid Build Coastguard Worker 609*cda5da8dSAndroid Build Coastguard Worker nonlocal level 610*cda5da8dSAndroid Build Coastguard Worker level += 1 611*cda5da8dSAndroid Build Coastguard Worker i = start 612*cda5da8dSAndroid Build Coastguard Worker while i < end: 613*cda5da8dSAndroid Build Coastguard Worker start = i 614*cda5da8dSAndroid Build Coastguard Worker op = code[i] 615*cda5da8dSAndroid Build Coastguard Worker i += 1 616*cda5da8dSAndroid Build Coastguard Worker op = OPCODES[op] 617*cda5da8dSAndroid Build Coastguard Worker if op in (SUCCESS, FAILURE, ANY, ANY_ALL, 618*cda5da8dSAndroid Build Coastguard Worker MAX_UNTIL, MIN_UNTIL, NEGATE): 619*cda5da8dSAndroid Build Coastguard Worker print_(op) 620*cda5da8dSAndroid Build Coastguard Worker elif op in (LITERAL, NOT_LITERAL, 621*cda5da8dSAndroid Build Coastguard Worker LITERAL_IGNORE, NOT_LITERAL_IGNORE, 622*cda5da8dSAndroid Build Coastguard Worker LITERAL_UNI_IGNORE, NOT_LITERAL_UNI_IGNORE, 623*cda5da8dSAndroid Build Coastguard Worker LITERAL_LOC_IGNORE, NOT_LITERAL_LOC_IGNORE): 624*cda5da8dSAndroid Build Coastguard Worker arg = code[i] 625*cda5da8dSAndroid Build Coastguard Worker i += 1 626*cda5da8dSAndroid Build Coastguard Worker print_(op, '%#02x (%r)' % (arg, chr(arg))) 627*cda5da8dSAndroid Build Coastguard Worker elif op is AT: 628*cda5da8dSAndroid Build Coastguard Worker arg = code[i] 629*cda5da8dSAndroid Build Coastguard Worker i += 1 630*cda5da8dSAndroid Build Coastguard Worker arg = str(ATCODES[arg]) 631*cda5da8dSAndroid Build Coastguard Worker assert arg[:3] == 'AT_' 632*cda5da8dSAndroid Build Coastguard Worker print_(op, arg[3:]) 633*cda5da8dSAndroid Build Coastguard Worker elif op is CATEGORY: 634*cda5da8dSAndroid Build Coastguard Worker arg = code[i] 635*cda5da8dSAndroid Build Coastguard Worker i += 1 636*cda5da8dSAndroid Build Coastguard Worker arg = str(CHCODES[arg]) 637*cda5da8dSAndroid Build Coastguard Worker assert arg[:9] == 'CATEGORY_' 638*cda5da8dSAndroid Build Coastguard Worker print_(op, arg[9:]) 639*cda5da8dSAndroid Build Coastguard Worker elif op in (IN, IN_IGNORE, IN_UNI_IGNORE, IN_LOC_IGNORE): 640*cda5da8dSAndroid Build Coastguard Worker skip = code[i] 641*cda5da8dSAndroid Build Coastguard Worker print_(op, skip, to=i+skip) 642*cda5da8dSAndroid Build Coastguard Worker dis_(i+1, i+skip) 643*cda5da8dSAndroid Build Coastguard Worker i += skip 644*cda5da8dSAndroid Build Coastguard Worker elif op in (RANGE, RANGE_UNI_IGNORE): 645*cda5da8dSAndroid Build Coastguard Worker lo, hi = code[i: i+2] 646*cda5da8dSAndroid Build Coastguard Worker i += 2 647*cda5da8dSAndroid Build Coastguard Worker print_(op, '%#02x %#02x (%r-%r)' % (lo, hi, chr(lo), chr(hi))) 648*cda5da8dSAndroid Build Coastguard Worker elif op is CHARSET: 649*cda5da8dSAndroid Build Coastguard Worker print_(op, _hex_code(code[i: i + 256//_CODEBITS])) 650*cda5da8dSAndroid Build Coastguard Worker i += 256//_CODEBITS 651*cda5da8dSAndroid Build Coastguard Worker elif op is BIGCHARSET: 652*cda5da8dSAndroid Build Coastguard Worker arg = code[i] 653*cda5da8dSAndroid Build Coastguard Worker i += 1 654*cda5da8dSAndroid Build Coastguard Worker mapping = list(b''.join(x.to_bytes(_sre.CODESIZE, sys.byteorder) 655*cda5da8dSAndroid Build Coastguard Worker for x in code[i: i + 256//_sre.CODESIZE])) 656*cda5da8dSAndroid Build Coastguard Worker print_(op, arg, mapping) 657*cda5da8dSAndroid Build Coastguard Worker i += 256//_sre.CODESIZE 658*cda5da8dSAndroid Build Coastguard Worker level += 1 659*cda5da8dSAndroid Build Coastguard Worker for j in range(arg): 660*cda5da8dSAndroid Build Coastguard Worker print_2(_hex_code(code[i: i + 256//_CODEBITS])) 661*cda5da8dSAndroid Build Coastguard Worker i += 256//_CODEBITS 662*cda5da8dSAndroid Build Coastguard Worker level -= 1 663*cda5da8dSAndroid Build Coastguard Worker elif op in (MARK, GROUPREF, GROUPREF_IGNORE, GROUPREF_UNI_IGNORE, 664*cda5da8dSAndroid Build Coastguard Worker GROUPREF_LOC_IGNORE): 665*cda5da8dSAndroid Build Coastguard Worker arg = code[i] 666*cda5da8dSAndroid Build Coastguard Worker i += 1 667*cda5da8dSAndroid Build Coastguard Worker print_(op, arg) 668*cda5da8dSAndroid Build Coastguard Worker elif op is JUMP: 669*cda5da8dSAndroid Build Coastguard Worker skip = code[i] 670*cda5da8dSAndroid Build Coastguard Worker print_(op, skip, to=i+skip) 671*cda5da8dSAndroid Build Coastguard Worker i += 1 672*cda5da8dSAndroid Build Coastguard Worker elif op is BRANCH: 673*cda5da8dSAndroid Build Coastguard Worker skip = code[i] 674*cda5da8dSAndroid Build Coastguard Worker print_(op, skip, to=i+skip) 675*cda5da8dSAndroid Build Coastguard Worker while skip: 676*cda5da8dSAndroid Build Coastguard Worker dis_(i+1, i+skip) 677*cda5da8dSAndroid Build Coastguard Worker i += skip 678*cda5da8dSAndroid Build Coastguard Worker start = i 679*cda5da8dSAndroid Build Coastguard Worker skip = code[i] 680*cda5da8dSAndroid Build Coastguard Worker if skip: 681*cda5da8dSAndroid Build Coastguard Worker print_('branch', skip, to=i+skip) 682*cda5da8dSAndroid Build Coastguard Worker else: 683*cda5da8dSAndroid Build Coastguard Worker print_(FAILURE) 684*cda5da8dSAndroid Build Coastguard Worker i += 1 685*cda5da8dSAndroid Build Coastguard Worker elif op in (REPEAT, REPEAT_ONE, MIN_REPEAT_ONE, 686*cda5da8dSAndroid Build Coastguard Worker POSSESSIVE_REPEAT, POSSESSIVE_REPEAT_ONE): 687*cda5da8dSAndroid Build Coastguard Worker skip, min, max = code[i: i+3] 688*cda5da8dSAndroid Build Coastguard Worker if max == MAXREPEAT: 689*cda5da8dSAndroid Build Coastguard Worker max = 'MAXREPEAT' 690*cda5da8dSAndroid Build Coastguard Worker print_(op, skip, min, max, to=i+skip) 691*cda5da8dSAndroid Build Coastguard Worker dis_(i+3, i+skip) 692*cda5da8dSAndroid Build Coastguard Worker i += skip 693*cda5da8dSAndroid Build Coastguard Worker elif op is GROUPREF_EXISTS: 694*cda5da8dSAndroid Build Coastguard Worker arg, skip = code[i: i+2] 695*cda5da8dSAndroid Build Coastguard Worker print_(op, arg, skip, to=i+skip) 696*cda5da8dSAndroid Build Coastguard Worker i += 2 697*cda5da8dSAndroid Build Coastguard Worker elif op in (ASSERT, ASSERT_NOT): 698*cda5da8dSAndroid Build Coastguard Worker skip, arg = code[i: i+2] 699*cda5da8dSAndroid Build Coastguard Worker print_(op, skip, arg, to=i+skip) 700*cda5da8dSAndroid Build Coastguard Worker dis_(i+2, i+skip) 701*cda5da8dSAndroid Build Coastguard Worker i += skip 702*cda5da8dSAndroid Build Coastguard Worker elif op is ATOMIC_GROUP: 703*cda5da8dSAndroid Build Coastguard Worker skip = code[i] 704*cda5da8dSAndroid Build Coastguard Worker print_(op, skip, to=i+skip) 705*cda5da8dSAndroid Build Coastguard Worker dis_(i+1, i+skip) 706*cda5da8dSAndroid Build Coastguard Worker i += skip 707*cda5da8dSAndroid Build Coastguard Worker elif op is INFO: 708*cda5da8dSAndroid Build Coastguard Worker skip, flags, min, max = code[i: i+4] 709*cda5da8dSAndroid Build Coastguard Worker if max == MAXREPEAT: 710*cda5da8dSAndroid Build Coastguard Worker max = 'MAXREPEAT' 711*cda5da8dSAndroid Build Coastguard Worker print_(op, skip, bin(flags), min, max, to=i+skip) 712*cda5da8dSAndroid Build Coastguard Worker start = i+4 713*cda5da8dSAndroid Build Coastguard Worker if flags & SRE_INFO_PREFIX: 714*cda5da8dSAndroid Build Coastguard Worker prefix_len, prefix_skip = code[i+4: i+6] 715*cda5da8dSAndroid Build Coastguard Worker print_2(' prefix_skip', prefix_skip) 716*cda5da8dSAndroid Build Coastguard Worker start = i + 6 717*cda5da8dSAndroid Build Coastguard Worker prefix = code[start: start+prefix_len] 718*cda5da8dSAndroid Build Coastguard Worker print_2(' prefix', 719*cda5da8dSAndroid Build Coastguard Worker '[%s]' % ', '.join('%#02x' % x for x in prefix), 720*cda5da8dSAndroid Build Coastguard Worker '(%r)' % ''.join(map(chr, prefix))) 721*cda5da8dSAndroid Build Coastguard Worker start += prefix_len 722*cda5da8dSAndroid Build Coastguard Worker print_2(' overlap', code[start: start+prefix_len]) 723*cda5da8dSAndroid Build Coastguard Worker start += prefix_len 724*cda5da8dSAndroid Build Coastguard Worker if flags & SRE_INFO_CHARSET: 725*cda5da8dSAndroid Build Coastguard Worker level += 1 726*cda5da8dSAndroid Build Coastguard Worker print_2('in') 727*cda5da8dSAndroid Build Coastguard Worker dis_(start, i+skip) 728*cda5da8dSAndroid Build Coastguard Worker level -= 1 729*cda5da8dSAndroid Build Coastguard Worker i += skip 730*cda5da8dSAndroid Build Coastguard Worker else: 731*cda5da8dSAndroid Build Coastguard Worker raise ValueError(op) 732*cda5da8dSAndroid Build Coastguard Worker 733*cda5da8dSAndroid Build Coastguard Worker level -= 1 734*cda5da8dSAndroid Build Coastguard Worker 735*cda5da8dSAndroid Build Coastguard Worker dis_(0, len(code)) 736*cda5da8dSAndroid Build Coastguard Worker 737*cda5da8dSAndroid Build Coastguard Worker 738*cda5da8dSAndroid Build Coastguard Workerdef compile(p, flags=0): 739*cda5da8dSAndroid Build Coastguard Worker # internal: convert pattern list to internal format 740*cda5da8dSAndroid Build Coastguard Worker 741*cda5da8dSAndroid Build Coastguard Worker if isstring(p): 742*cda5da8dSAndroid Build Coastguard Worker pattern = p 743*cda5da8dSAndroid Build Coastguard Worker p = _parser.parse(p, flags) 744*cda5da8dSAndroid Build Coastguard Worker else: 745*cda5da8dSAndroid Build Coastguard Worker pattern = None 746*cda5da8dSAndroid Build Coastguard Worker 747*cda5da8dSAndroid Build Coastguard Worker code = _code(p, flags) 748*cda5da8dSAndroid Build Coastguard Worker 749*cda5da8dSAndroid Build Coastguard Worker if flags & SRE_FLAG_DEBUG: 750*cda5da8dSAndroid Build Coastguard Worker print() 751*cda5da8dSAndroid Build Coastguard Worker dis(code) 752*cda5da8dSAndroid Build Coastguard Worker 753*cda5da8dSAndroid Build Coastguard Worker # map in either direction 754*cda5da8dSAndroid Build Coastguard Worker groupindex = p.state.groupdict 755*cda5da8dSAndroid Build Coastguard Worker indexgroup = [None] * p.state.groups 756*cda5da8dSAndroid Build Coastguard Worker for k, i in groupindex.items(): 757*cda5da8dSAndroid Build Coastguard Worker indexgroup[i] = k 758*cda5da8dSAndroid Build Coastguard Worker 759*cda5da8dSAndroid Build Coastguard Worker return _sre.compile( 760*cda5da8dSAndroid Build Coastguard Worker pattern, flags | p.state.flags, code, 761*cda5da8dSAndroid Build Coastguard Worker p.state.groups-1, 762*cda5da8dSAndroid Build Coastguard Worker groupindex, tuple(indexgroup) 763*cda5da8dSAndroid Build Coastguard Worker ) 764