The Ply lexer, which doesn't seem too active a project, wraps regular expressions from grammar definitions in its own regular expressions that name groups. This breaks re.compile in Python >= 3.11 when the original expressions contain global flags, because the compiler requires that global flags appear at the start of the expression instead of inside the named group. This patch wraps re.compile to scan the expression for any global flags and, when found, moves them to the start of the expression. --- a/third_party/python/ply/ply/lex.py +++ b/third_party/python/ply/ply/lex.py @@ -49,6 +49,37 @@ # Python 3.0 StringTypes = (str, bytes) + +def _re_compile(expression, *args, **kwargs): + ''' + Rearrange global flags in the regular expression to appear at the + beginning, avoiding deprecation warnings on Python < 3.11 and hard + errors on Python >= 3.11. + ''' + flags = set() + remainder = '' + + pattern = re.compile(r'\(\?([aiLmsux]+)\)') + while m := pattern.search(expression): + # Location of the global flag spec + l, h = m.span(0) + # Accumulate global flags from this spec + flags.update(m.group(1)) + # Capture all text leading up to the match + remainder += expression[:l] + # Trim to the end fo the flag spec + expression = expression[h:] + + # Any remaining expression contains no flags + remainder += expression + + # If there are flags, they belong at the beginning + if flags: + remainder = f'(?{"".join(sorted(flags))})' + remainder + + return re.compile(remainder, *args, **kwargs) + + # This regular expression is used to match valid token names _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') @@ -230,7 +261,7 @@ titem = [] txtitem = [] for pat, func_name in lre: - titem.append((re.compile(pat, lextab._lexreflags), _names_to_funcs(func_name, fdict))) + titem.append((_re_compile(pat, lextab._lexreflags), _names_to_funcs(func_name, fdict))) self.lexstatere[statename] = titem self.lexstateretext[statename] = txtitem @@ -495,7 +526,7 @@ return [] regex = '|'.join(relist) try: - lexre = re.compile(regex, reflags) + lexre = _re_compile(regex, reflags) # Build the index to function map for the matching engine lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1) @@ -758,7 +789,7 @@ continue try: - c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags) + c = _re_compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags) if c.match(''): self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file, line, f.__name__) self.error = True @@ -782,7 +813,7 @@ continue try: - c = re.compile('(?P<%s>%s)' % (name, r), self.reflags) + c = _re_compile('(?P<%s>%s)' % (name, r), self.reflags) if (c.match('')): self.log.error("Regular expression for rule '%s' matches empty string", name) self.error = True