-
-
Notifications
You must be signed in to change notification settings - Fork 351
Subst.py: fix bugs and improve substitution performance #4867
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 1 commit
5f525f0
50627a6
5db2529
89084fe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,6 +27,7 @@ | |
|
|
||
| import re | ||
| from collections import UserList, UserString | ||
| from functools import lru_cache | ||
| from inspect import signature, Parameter | ||
|
|
||
| import SCons.Errors | ||
|
|
@@ -45,7 +46,7 @@ | |
|
|
||
| def SetAllowableExceptions(*excepts) -> None: | ||
| global AllowableExceptions | ||
| AllowableExceptions = [_f for _f in excepts if _f] | ||
| AllowableExceptions = tuple(_f for _f in excepts if _f) | ||
|
|
||
|
|
||
| def raise_exception(exception, target, s): | ||
|
|
@@ -84,9 +85,6 @@ def __eq__(self, other) -> bool: | |
| return False | ||
| return self.lstr == other.lstr | ||
|
|
||
| def __neq__(self, other) -> bool: | ||
| return not self.__eq__(other) | ||
|
|
||
| def __hash__(self) -> int: | ||
| return hash(self.lstr) | ||
|
|
||
|
|
@@ -340,6 +338,49 @@ def get_src_subst_proxy(node): | |
|
|
||
| _callable_args_set = {'target', 'source', 'env', 'for_signature'} | ||
|
|
||
|
|
||
| def _check_callable_subst_args(s) -> bool: | ||
| """Check if callable *s* matches the subst calling convention. | ||
|
|
||
| A callable construction variable must be callable with exactly the | ||
| arguments (target, source, env, for_signature); any additional | ||
| parameters must have default values (which also allows | ||
| functools.partial objects to work). | ||
| """ | ||
| try: | ||
| params = signature(s).parameters.items() | ||
| except (ValueError, TypeError): | ||
| # signature() can fail on some C/builtin callables; treat them | ||
| # as not matching our calling convention. | ||
| return False | ||
| return { | ||
| k for k, v in params if k in _callable_args_set or v.default is Parameter.empty | ||
| } == _callable_args_set | ||
|
|
||
|
|
||
| _callable_subst_args_cache: dict = {} | ||
|
|
||
|
|
||
| def _callable_matches_subst_args(s) -> bool: | ||
| """Cached version of :func:`_check_callable_subst_args`. | ||
|
|
||
| Inspecting a signature is expensive and the same callables are | ||
| expanded over and over (once or more per target), so cache the | ||
| result per callable. The cache holds strong references, but these | ||
| callables are construction-variable values which normally live as | ||
| long as the build anyway. | ||
| """ | ||
| try: | ||
| return _callable_subst_args_cache[s] | ||
| except KeyError: | ||
| pass | ||
| except TypeError: | ||
| # unhashable callable: do the check each time | ||
| return _check_callable_subst_args(s) | ||
| result = _callable_subst_args_cache[s] = _check_callable_subst_args(s) | ||
| return result | ||
|
|
||
|
|
||
| class StringSubber: | ||
| """A class to construct the results of a scons_subst() call. | ||
|
|
||
|
|
@@ -382,9 +423,8 @@ def expand(self, s, lvars): | |
| return s | ||
| else: | ||
| key = s[1:] | ||
| if key[0] == '{' or '.' in key: | ||
| if key[0] == '{': | ||
| key = key[1:-1] | ||
| if key[0] == '{': | ||
| key = key[1:-1] | ||
|
|
||
| # Store for error messages if we fail to expand the | ||
| # value | ||
|
|
@@ -409,20 +449,18 @@ def expand(self, s, lvars): | |
| elif s is None: | ||
| return '' | ||
|
|
||
| # A plain string with no more expansions needs no | ||
| # further processing, so skip the copy/recursion below. | ||
| if isinstance(s, str) and '$' not in s: | ||
| return s | ||
|
|
||
| # Before re-expanding the result, handle | ||
| # recursive expansion by copying the local | ||
| # variable dictionary and overwriting a null | ||
| # string for the value of the variable name | ||
| # we just expanded. | ||
| # | ||
| # This could potentially be optimized by only | ||
| # copying lvars when s contains more expansions, | ||
| # but lvars is usually supposed to be pretty | ||
| # small, and deeply nested variable expansions | ||
| # are probably more the exception than the norm, | ||
| # so it should be tolerable for now. | ||
| lv = lvars.copy() | ||
| var = key.split('.')[0] | ||
| var = key.partition('.')[0] | ||
|
mwichmann marked this conversation as resolved.
|
||
| lv[var] = '' | ||
| return self.substitute(s, lv) | ||
| elif is_Sequence(s): | ||
|
|
@@ -436,8 +474,7 @@ def func(l, conv=self.conv, substitute=self.substitute, lvars=lvars): | |
| # string if called on, so we make an exception in this condition for Null class | ||
| # Also allow callables where the only non default valued args match the expected defaults | ||
| # this should also allow functools.partial's to work. | ||
| if isinstance(s, SCons.Util.Null) or {k for k, v in signature(s).parameters.items() if | ||
| k in _callable_args_set or v.default == Parameter.empty} == _callable_args_set: | ||
| if isinstance(s, SCons.Util.Null) or _callable_matches_subst_args(s): | ||
|
|
||
| s = s(target=lvars['TARGETS'], | ||
| source=lvars['SOURCES'], | ||
|
|
@@ -526,12 +563,19 @@ def expanded(self, s) -> bool: | |
| method is used to determine if a string is already fully | ||
| expanded and if so exit the loop early to prevent these | ||
| recursive calls. | ||
|
|
||
| Only a string without any ``$`` (no further expansion) and | ||
| without any whitespace (no word-splitting needed) can safely | ||
| be appended directly as a single word. | ||
| """ | ||
| if not is_String(s) or isinstance(s, CmdStringHolder): | ||
| return False | ||
|
|
||
| s = str(s) # in case it's a UserString | ||
| return _separate_args.findall(s) is None | ||
| if not s: | ||
| # an empty string must not be appended as an empty word | ||
| return False | ||
| return _unexpandable.search(s) is None | ||
|
|
||
| def expand(self, s, lvars, within_list): | ||
| """Expand a single "token" as necessary, appending the | ||
|
|
@@ -561,9 +605,8 @@ def expand(self, s, lvars, within_list): | |
| self.close_strip('$)') | ||
| else: | ||
| key = s[1:] | ||
| if key.startswith('{') or '.' in key: | ||
| if key.startswith('{'): | ||
| key = key[1:-1] | ||
| if key[0] == '{': | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why get rid of startswith? And we don't need the attribute-access check?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same comment as above about it's more typing? Not exactly sure, but is there any reason to use startswith instead of just checking the 1 character for a single character?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. functionally no; readability-wise I think yes. I hate using indexing-slicing with just "magic numbers"; we can't avoid them with node lists where stuff[0] is magical and doesn't have an alternative, but I do like starts/endswith on strings. Not a big deal, though. |
||
| key = key[1:-1] | ||
|
|
||
| # Store for error messages if we fail to expand the | ||
| # value | ||
|
|
@@ -584,7 +627,7 @@ def expand(self, s, lvars, within_list): | |
| raise_exception(e, lvars['TARGETS'], old_s) | ||
|
|
||
| if s is None and NameError not in AllowableExceptions: | ||
| raise_exception(NameError(), lvars['TARGETS'], old_s) | ||
| raise_exception(NameError(key), lvars['TARGETS'], old_s) | ||
| elif s is None: | ||
| return | ||
|
|
||
|
|
@@ -600,7 +643,7 @@ def expand(self, s, lvars, within_list): | |
| # string for the value of the variable name | ||
| # we just expanded. | ||
| lv = lvars.copy() | ||
| var = key.split('.')[0] | ||
| var = key.partition('.')[0] | ||
| lv[var] = '' | ||
| self.substitute(s, lv, 0) | ||
| self.this_word() | ||
|
|
@@ -614,8 +657,7 @@ def expand(self, s, lvars, within_list): | |
| # string if called on, so we make an exception in this condition for Null class | ||
| # Also allow callables where the only non default valued args match the expected defaults | ||
| # this should also allow functools.partial's to work. | ||
| if isinstance(s, SCons.Util.Null) or {k for k, v in signature(s).parameters.items() if | ||
| k in _callable_args_set or v.default == Parameter.empty} == _callable_args_set: | ||
| if isinstance(s, SCons.Util.Null) or _callable_matches_subst_args(s): | ||
|
|
||
| s = s(target=lvars['TARGETS'], | ||
| source=lvars['SOURCES'], | ||
|
|
@@ -815,12 +857,16 @@ def _remove_list(list): | |
| _dollar_exps = re.compile(r'(%s)' % _dollar_exps_str) | ||
| _separate_args = re.compile(r'(%s|\s+|[^\s$]+|\$)' % _dollar_exps_str) | ||
|
|
||
| # Matches strings which may need further expansion ('$') or | ||
| # word-splitting (whitespace); see ListSubber.expanded(). | ||
| _unexpandable = re.compile(r'[\s$]') | ||
|
|
||
| # This regular expression is used to replace strings of multiple white | ||
| # space characters in the string result from the scons_subst() function. | ||
| _space_sep = re.compile(r'[\t ]+(?![^{]*})') | ||
|
|
||
|
|
||
| def scons_subst(strSubst, env, mode=SUBST_RAW, target=None, source=None, gvars={}, lvars={}, conv=None, overrides: dict | None = None): | ||
| def scons_subst(strSubst, env, mode=SUBST_RAW, target=None, source=None, gvars=None, lvars=None, conv=None, overrides: dict | None = None): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a long-standing checker flag - mutable default arg. Using None as a sentinel and then adding checks is the usual solution - we've changed this in a few other places. Is there an actual benefit to the change? We don't change gvars (well, we change it temporarily).
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was an error condition where gvars or lvars could get set and not cleared and then cause issues on next call. (see the notes I posted in discord?) |
||
| """Expand a string or list containing construction variable | ||
| substitutions. | ||
|
|
||
|
|
@@ -832,6 +878,11 @@ def scons_subst(strSubst, env, mode=SUBST_RAW, target=None, source=None, gvars={ | |
| if (isinstance(strSubst, str) and '$' not in strSubst) or isinstance(strSubst, CmdStringHolder): | ||
| return strSubst | ||
|
|
||
| if gvars is None: | ||
| gvars = {} | ||
| if lvars is None: | ||
| lvars = {} | ||
|
|
||
| if conv is None: | ||
| conv = _strconv[mode] | ||
|
|
||
|
|
@@ -850,25 +901,28 @@ def scons_subst(strSubst, env, mode=SUBST_RAW, target=None, source=None, gvars={ | |
| lvars = lvars.copy() | ||
| lvars.update(d) | ||
|
|
||
| # Allow last ditch chance to override lvars | ||
| # Allow last ditch chance to override lvars; don't mutate the | ||
| # caller's dictionary doing so. | ||
| if overrides: | ||
| lvars.update(overrides) | ||
| lvars = {**lvars, **overrides} | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe there's a way to restruture this plus the above - because in at least one flow we could now remake lvars twice - once from the copy, then here from the repacking to account for the override.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here's a proposed rejig for this: # Build any special TARGET/SOURCE vars and apply overrides.
# Only copy the caller's lvars once if we need to modify it.
d = {}
if 'TARGET' not in lvars:
d = subst_dict(target, source)
if d or overrides:
lvars = {**lvars, **d, **overrides}If this approach seems okay, I can make a new PR once this one is resolved - since this is essentially a new request let's not muddy this one. There's a small tweak needed in ActionTests in any case, the inconsistency there doesn't break until this proposed change makes it stricter that overrides needs to be a dict.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AI says
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, I know it needs more scaffolding at the top. I didn't say it was a complete PR, though it works in my copy here.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, didn't read this all. I'd rather put the overrides stuff at the top of the function, though this works as well. With the other entry-point checks: There's a small hange needed to correct an error in |
||
|
|
||
| # We're (most likely) going to eval() things. If Python doesn't | ||
| # find a __builtins__ value in the global dictionary used for eval(), | ||
| # it copies the current global values for you. Avoid this by | ||
| # setting it explicitly and then deleting, so we don't pollute the | ||
| # construction environment Dictionary(ies) that are typically used | ||
| # for expansion. | ||
| # for expansion. gvars is usually the live env dict, so make sure | ||
| # the deletion happens even if substitution raises. | ||
| gvars['__builtins__'] = __builtins__ | ||
|
|
||
| ss = StringSubber(env, mode, conv, gvars) | ||
| result = ss.substitute(strSubst, lvars) | ||
|
|
||
| try: | ||
| del gvars['__builtins__'] | ||
| except KeyError: | ||
| pass | ||
| result = ss.substitute(strSubst, lvars) | ||
| finally: | ||
| try: | ||
| del gvars['__builtins__'] | ||
| except KeyError: | ||
| pass | ||
|
|
||
| res = result | ||
| if is_String(result): | ||
|
|
@@ -902,14 +956,19 @@ def scons_subst(strSubst, env, mode=SUBST_RAW, target=None, source=None, gvars={ | |
|
|
||
| return result | ||
|
|
||
| def scons_subst_list(strSubst, env, mode=SUBST_RAW, target=None, source=None, gvars={}, lvars={}, conv=None, overrides: dict | None = None): | ||
| def scons_subst_list(strSubst, env, mode=SUBST_RAW, target=None, source=None, gvars=None, lvars=None, conv=None, overrides: dict | None = None): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same comments as for |
||
| """Substitute construction variables in a string (or list or other | ||
| object) and separate the arguments into a command list. | ||
|
|
||
| The companion scons_subst() function (above) handles basic | ||
| substitutions within strings, so see that function instead | ||
| if that's what you're looking for. | ||
| """ | ||
| if gvars is None: | ||
| gvars = {} | ||
| if lvars is None: | ||
| lvars = {} | ||
|
|
||
| if conv is None: | ||
| conv = _strconv[mode] | ||
|
|
||
|
|
@@ -928,25 +987,28 @@ def scons_subst_list(strSubst, env, mode=SUBST_RAW, target=None, source=None, gv | |
| lvars = lvars.copy() | ||
| lvars.update(d) | ||
|
|
||
| # Allow caller to specify last ditch override of lvars | ||
| # Allow caller to specify last ditch override of lvars; don't | ||
| # mutate the caller's dictionary doing so. | ||
| if overrides: | ||
| lvars.update(overrides) | ||
| lvars = {**lvars, **overrides} | ||
|
|
||
| # We're (most likely) going to eval() things. If Python doesn't | ||
| # find a __builtins__ value in the global dictionary used for eval(), | ||
| # it copies the current global values for you. Avoid this by | ||
| # setting it explicitly and then deleting, so we don't pollute the | ||
| # construction environment Dictionary(ies) that are typically used | ||
| # for expansion. | ||
| # for expansion. gvars is usually the live env dict, so make sure | ||
| # the deletion happens even if substitution raises. | ||
| gvars['__builtins__'] = __builtins__ | ||
|
|
||
| ls = ListSubber(env, mode, conv, gvars) | ||
| ls.substitute(strSubst, lvars, 0) | ||
|
|
||
| try: | ||
| del gvars['__builtins__'] | ||
| except KeyError: | ||
| pass | ||
| ls.substitute(strSubst, lvars, 0) | ||
| finally: | ||
| try: | ||
| del gvars['__builtins__'] | ||
| except KeyError: | ||
| pass | ||
|
|
||
| return ls.data | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
so we just don't need the check for "attribute access"?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The original logic was checking for { or . and then checking for { and then chopping the bookended {}'s off and doing nothing with the .
The . gets handled by not being in lvars or gvars and thus it gets eval'd
It's never used to shortcut checking lvars or gvars, so is kinda pointless here.