From 868ad1b3b5137b63f0f9a2919584513c28e9446d Mon Sep 17 00:00:00 2001 From: weiconghe <13976098570@163.com> Date: Mon, 15 Jun 2026 11:35:51 +0800 Subject: [PATCH] feat: support directory paths in get_symbols_overview with max_files safeguard (issue #1412) - Add directory support to GetSymbolsOverviewTool via _apply_directory, returning a per-file mapping of grouped symbols - Add max_files parameter (default 20) that raises ValueError when a directory contains more analyzable files than the limit, addressing maintainer feedback on issue #1412 about unbounded top-level overviews - Single-file behavior is unchanged - Tests: directory happy path, single-file regression, max_files guard --- src/serena/tools/symbol_tools.py | 66 +++++++++++++++++++++++++++++--- test/serena/test_serena_agent.py | 55 ++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 6 deletions(-) diff --git a/src/serena/tools/symbol_tools.py b/src/serena/tools/symbol_tools.py index 7c11fce4c..cee70f053 100644 --- a/src/serena/tools/symbol_tools.py +++ b/src/serena/tools/symbol_tools.py @@ -35,25 +35,38 @@ def apply(self) -> str: class GetSymbolsOverviewTool(Tool, ToolMarkerSymbolicRead): """ - Gets an overview of the top-level symbols defined in a given file. + Gets an overview of the top-level symbols defined in a given file or directory. """ symbol_dict_grouper = LanguageServerSymbolDictGrouper(["kind"], ["kind"], collapse_singleton=True) - def apply(self, relative_path: str, depth: int = 0, max_answer_chars: int = -1) -> str: + def apply(self, relative_path: str, depth: int = 0, max_answer_chars: int = -1, max_files: int = 20) -> str: """ - Use this tool to get a high-level understanding of the code symbols in a file. + Use this tool to get a high-level understanding of the code symbols in a file or directory. This should be the first tool to call when you want to understand a new file, unless you already know what you are looking for. + When given a directory path, returns top-level symbols for every analyzable file in the directory. - :param relative_path: the relative path to the file to get the overview of + :param relative_path: the relative path to the file or directory to get the overview of :param depth: depth up to which descendants of top-level symbols shall be retrieved (e.g. 1 retrieves immediate children). Default 0. :param max_answer_chars: if the overview is longer than this number of characters, no content will be returned. -1 means the default value from the config will be used. Don't adjust unless there is really no other way to get the content required for the task. + :param max_files: only used when relative_path is a directory. If the directory contains more + analyzable files than this limit, the tool raises ValueError instead of returning a partial + overview — narrow the path to a subdirectory, or learn the layout from memories first. + Default 20. Don't increase unless you really need a broad sweep and accept the token cost. :return: a JSON object containing symbols grouped by kind in a compact format. + For directories, returns a mapping of file paths to their grouped symbols. """ + file_path = os.path.join(self.project.project_root, relative_path) + if not os.path.exists(file_path): + raise FileNotFoundError(f"File or directory {relative_path} does not exist in the project.") + + if os.path.isdir(file_path): + return self._apply_directory(relative_path, depth=depth, max_answer_chars=max_answer_chars, max_files=max_files) + result = self.get_symbol_overview(relative_path, depth=depth) # capture kind names and depth-0 snapshots before grouping, which mutates the dicts @@ -82,6 +95,49 @@ def make_depth_0_result() -> str: return self._limit_length(result_json_str, max_answer_chars, shortened_result_factories=shortened_results) + def _apply_directory(self, relative_path: str, depth: int = 0, max_answer_chars: int = -1, max_files: int = 20) -> str: + symbol_retriever = self.create_language_server_symbol_retriever() + path_to_symbols = symbol_retriever.get_symbol_overview(relative_path) + + total_files = len(path_to_symbols) + if total_files > max_files: + sample = list(path_to_symbols.keys())[:5] + raise ValueError( + f"Directory {relative_path} contains {total_files} analyzable files, which exceeds " + f"max_files={max_files}. Narrow the path to a more specific subdirectory, or learn the " + f"repository layout from memories before asking for a broad overview. " + f"Sample files found: {sample}" + ) + + def child_inclusion_predicate(s: LanguageServerSymbol) -> bool: + return not s.is_low_level() + + per_file_result = {} + file_count = 0 + for file_rel_path, symbols in path_to_symbols.items(): + symbol_dicts = [] + for symbol in symbols: + symbol_dicts.append( + symbol.to_dict( + name_path=False, + name=True, + depth=depth, + kind=True, + relative_path=False, + location=False, + child_inclusion_predicate=child_inclusion_predicate, + ) + ) + per_file_result[file_rel_path] = self.symbol_dict_grouper.group(symbol_dicts) + file_count += 1 + + result_json_str = self._to_json(per_file_result) + + def make_file_counts() -> str: + return f"Analyzed {file_count} files in directory {relative_path}" + + return self._limit_length(result_json_str, max_answer_chars, shortened_result_factories=[make_file_counts]) + def get_symbol_overview(self, relative_path: str, depth: int = 0) -> list[LanguageServerSymbol.OutputDict]: """ :param relative_path: relative path to a source file @@ -90,8 +146,6 @@ def get_symbol_overview(self, relative_path: str, depth: int = 0) -> list[Langua """ symbol_retriever = self.create_language_server_symbol_retriever() - # The symbol overview is capable of working with both files and directories, - # but we want to ensure that the user provides a file path. file_path = os.path.join(self.project.project_root, relative_path) if not os.path.exists(file_path): raise FileNotFoundError(f"File or directory {relative_path} does not exist in the project.") diff --git a/test/serena/test_serena_agent.py b/test/serena/test_serena_agent.py index c1e2578f9..8405c5d6c 100644 --- a/test/serena/test_serena_agent.py +++ b/test/serena/test_serena_agent.py @@ -24,6 +24,7 @@ FindReferencingSymbolsTool, FindSymbolTool, GetDiagnosticsForFileTool, + GetSymbolsOverviewTool, InitialInstructionsTool, ReplaceContentTool, ReplaceSymbolBodyTool, @@ -1276,6 +1277,60 @@ def test_safe_delete_symbol_succeeds_when_no_references(self, serena_agent: Sere f"Expected symbol {case.name_path} to be removed from {case.relative_path}, but it still appears in the file content" ) + @pytest.mark.parametrize( + "serena_agent", + [ + pytest.param(Language.PYTHON, marks=get_pytest_markers(Language.PYTHON), id="python_directory_overview"), + ], + indirect=["serena_agent"], + ) + def test_get_symbols_overview_directory_returns_per_file_symbols(self, serena_agent: SerenaAgent): + """ + Tests that get_symbols_overview accepts a directory path and returns + symbols grouped by file (Issue #1412). + """ + overview_tool = serena_agent.get_tool(GetSymbolsOverviewTool) + result = overview_tool.apply(relative_path="test_repo", depth=0) + result_dict = json.loads(result) + assert isinstance(result_dict, dict), f"Expected dict result for directory, got: {type(result_dict)}" + assert len(result_dict) > 0, "Expected at least one file in directory overview" + for file_path in result_dict: + assert file_path.endswith(".py"), f"Expected Python file path, got: {file_path}" + + @pytest.mark.parametrize( + "serena_agent", + [ + pytest.param(Language.PYTHON, marks=get_pytest_markers(Language.PYTHON), id="python_file_overview_unchanged"), + ], + indirect=["serena_agent"], + ) + def test_get_symbols_overview_file_returns_same_format(self, serena_agent: SerenaAgent): + """ + Regression test: get_symbols_overview with a file path should return + the same grouped format as before (list of symbol dicts by kind). + """ + overview_tool = serena_agent.get_tool(GetSymbolsOverviewTool) + result = overview_tool.apply(relative_path="test_repo/services.py", depth=0) + result_dict = json.loads(result) + assert isinstance(result_dict, dict), f"Expected dict result, got: {type(result_dict)}" + assert "test_repo/services.py" not in result_dict, "Single file result should not be wrapped in per-file mapping" + + @pytest.mark.parametrize( + "serena_agent", + [ + pytest.param(Language.PYTHON, marks=get_pytest_markers(Language.PYTHON), id="python_directory_exceeds_max_files"), + ], + indirect=["serena_agent"], + ) + def test_get_symbols_overview_directory_raises_when_exceeds_max_files(self, serena_agent: SerenaAgent): + """ + Tests that giving a directory with more analyzable files than max_files + raises ValueError with guidance to narrow the path (Issue #1412 maintainer feedback). + """ + overview_tool = serena_agent.get_tool(GetSymbolsOverviewTool) + with pytest.raises(ValueError, match="max_files=1"): + overview_tool.apply(relative_path="test_repo", depth=0, max_files=1) + class TestPromptProvision: class MockContext: