From 4072967b538a1fc239a90d588ea6cba40138181f Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Tue, 2 Jun 2026 12:47:48 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Optimize=20Dictionary=20Loo?= =?UTF-8?q?kups=20in=20Semantic=20Registry?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: bashandbone <89049923+bashandbone@users.noreply.github.com> --- .jules/bolt.md | 6 +++++- src/codeweaver/semantic/registry.py | 33 ++++++++++++----------------- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/.jules/bolt.md b/.jules/bolt.md index 7edb3f3bf..436837863 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -13,7 +13,7 @@ SPDX-License-Identifier: MIT OR Apache-2.0 # 2026-03-29 - Consider Readability and Possible Environment Limitations **Learning** While some patterns are hypothetically faster, they may not improve performance in i/o bound contexts. Examples include embedding/reranking requests and database operations where the dominant limiting factors are i/o constraints. -**Action** Don't recommend changes that reduce readability or diverge from Python idioms for no or marginal gains in performance. +**Action** Don't recommend changes that reduce readability or diverge from Python idioms for no or marginal gains in performance. ## 2026-04-01 - Fast generation of line pos lengths in Chunker with itertools **Learning:** itertools.accumulate(map(len, lines)) is significantly faster (~2-3x) than using a generator expression like (line_offsets[-1] + len(line) for line in lines) because it pushes the entire loop down to C level instead of creating generator overhead for each element. @@ -25,3 +25,7 @@ SPDX-License-Identifier: MIT OR Apache-2.0 ## 2025-04-12 - Walrus Operator Optimization **Learning:** Using the walrus operator inside a list comprehension to avoid redundant execution of string methods (like `.strip()`) is an effective and safe micro-optimization. The result of the assignment inside the list comprehension will intentionally leak into the scope of the caller function, but this standard Python behavior does not cause naming conflicts in non-recursive or non-global scopes. **Action:** Always favor using the walrus operator `:=` in list comprehensions or conditionals when identical string manipulations (e.g., `.strip()`) or expensive evaluation calls appear repeatedly within the identical expression branch. + +## 2025-05-15 - Avoiding Generator Comprehensions for Dictionary Value Lookups +**Learning:** Using `next((v for content in dict.values() for k, v in content.items() if k == target), default)` inside dictionary lookups introduces severe performance regressions in hot paths. This pattern converts a fast $O(1)$ direct key lookup into an $O(N^2)$ algorithmic complexity because it must generate frames and iterate over items, bypassing the hash map advantages. +**Action:** Replace dictionary generator comprehensions with simple `for` loops that use an early return/yield and a direct `in` check (`if target in content: return content[target]`), which is drastically faster and avoids generator overhead. diff --git a/src/codeweaver/semantic/registry.py b/src/codeweaver/semantic/registry.py index 37b77abe2..25e13ef28 100644 --- a/src/codeweaver/semantic/registry.py +++ b/src/codeweaver/semantic/registry.py @@ -344,17 +344,13 @@ def _get_direct_connections_by_source( """Get DirectConnections by their source Thing name across all languages.""" if language: yield from self.direct_connections[language].get(source, []) - yield from ( - next( - ( - conns - for content in self._direct_connections.values() - for con_name, conns in content.items() - if con_name == source - ), - [], - ) - ) + return + + # Optimization: Early return via direct lookup avoids O(N^2) generator overhead + for content in self._direct_connections.values(): + if source in content: + yield from content[source] + break def _get_positional_connections_by_source( self, source: ThingNameT, *, language: SemanticSearchLanguage | None = None @@ -362,15 +358,12 @@ def _get_positional_connections_by_source( """Get PositionalConnectionss by their source Thing name across all languages.""" if language: return self.positional_connections[language].get(source) - return next( - ( - conn - for content in self._positional_connections.values() - for con_name, conn in content.items() - if con_name == source - ), - None, - ) + + # Optimization: Early return via direct lookup avoids O(N^2) generator overhead + for content in self._positional_connections.values(): + if source in content: + return content[source] + return None def get_positional_connections_by_source( self, source: ThingNameT, *, language: SemanticSearchLanguage | None = None