From 527282467c583b3abd17614dfd30d10c80fb7e5c Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Sat, 20 Jun 2026 13:44:13 +0800 Subject: [PATCH] Add mechanical stuck-loop guard for agent loops --- README.md | 7 + README/README_zh-CN.md | 7 + README/README_zh-TW.md | 7 + .../Eng/doc/new_features/v46_features_doc.rst | 52 +++++++ docs/source/Eng/eng_index.rst | 1 + .../Zh/doc/new_features/v46_features_doc.rst | 48 +++++++ docs/source/Zh/zh_index.rst | 1 + je_auto_control/__init__.py | 5 + .../gui/script_builder/command_schema.py | 15 ++ .../utils/executor/action_executor.py | 18 +++ je_auto_control/utils/loop_guard/__init__.py | 8 ++ .../utils/loop_guard/loop_guard.py | 132 ++++++++++++++++++ .../utils/mcp_server/tools/_factories.py | 27 +++- .../utils/mcp_server/tools/_handlers.py | 13 ++ .../headless/test_loop_guard_batch.py | 96 +++++++++++++ 15 files changed, 436 insertions(+), 1 deletion(-) create mode 100644 docs/source/Eng/doc/new_features/v46_features_doc.rst create mode 100644 docs/source/Zh/doc/new_features/v46_features_doc.rst create mode 100644 je_auto_control/utils/loop_guard/__init__.py create mode 100644 je_auto_control/utils/loop_guard/loop_guard.py create mode 100644 test/unit_test/headless/test_loop_guard_batch.py diff --git a/README.md b/README.md index e91e379d..59769494 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ ## Table of Contents +- [What's new (2026-06-20) — Stuck-Loop Guard (Agent Loop Progress Detection)](#whats-new-2026-06-20--stuck-loop-guard-agent-loop-progress-detection) - [What's new (2026-06-20) — Coordinate-Space Mapping (Model Grid ⇄ Physical Pixels)](#whats-new-2026-06-20--coordinate-space-mapping-model-grid--physical-pixels) - [What's new (2026-06-20) — Voice-Command Router](#whats-new-2026-06-20--voice-command-router) - [What's new (2026-06-20) — Locale-Aware Number, Currency & Date Parsing](#whats-new-2026-06-20--locale-aware-number-currency--date-parsing) @@ -98,6 +99,12 @@ --- +## What's new (2026-06-20) — Stuck-Loop Guard (Agent Loop Progress Detection) + +Catch agents stuck in no-progress loops. Full reference: [`docs/source/Eng/doc/new_features/v46_features_doc.rst`](docs/source/Eng/doc/new_features/v46_features_doc.rst). + +- **`LoopGuard` / `digest_result`** (`AC_loop_guard_observe` / `AC_loop_guard_reset`, `ac_*`): the top computer-use failure mode is an agent repeating an action with no effect — and the model can't see its own loop. `LoopGuard` watches the `(tool, args, result)` stream and flags `repeat` (same call N times), `ping_pong` (A-B-A-B), and `no_op` (observation digest unchanged), escalating `ok`→`warn`→`critical` by run length. Complements the step/time budget and offline trajectory eval; pure-stdlib, deterministic. + ## What's new (2026-06-20) — Coordinate-Space Mapping (Model Grid ⇄ Physical Pixels) Translate computer-use model clicks to real pixels. Full reference: [`docs/source/Eng/doc/new_features/v45_features_doc.rst`](docs/source/Eng/doc/new_features/v45_features_doc.rst). diff --git a/README/README_zh-CN.md b/README/README_zh-CN.md index cc053d9f..2895fcaf 100644 --- a/README/README_zh-CN.md +++ b/README/README_zh-CN.md @@ -12,6 +12,7 @@ ## 目录 +- [本次更新 (2026-06-20) — 卡循环守卫(Agent Loop 进度检测)](#本次更新-2026-06-20--卡循环守卫agent-loop-进度检测) - [本次更新 (2026-06-20) — 坐标空间映射(模型网格 ⇄ 物理像素)](#本次更新-2026-06-20--坐标空间映射模型网格--物理像素) - [本次更新 (2026-06-20) — 语音指令路由器](#本次更新-2026-06-20--语音指令路由器) - [本次更新 (2026-06-20) — 区域设置感知的数字、货币与日期解析](#本次更新-2026-06-20--区域设置感知的数字货币与日期解析) @@ -97,6 +98,12 @@ --- +## 本次更新 (2026-06-20) — 卡循环守卫(Agent Loop 进度检测) + +捕捉卡在无进展循环的 agent。完整参考:[`docs/source/Zh/doc/new_features/v46_features_doc.rst`](../docs/source/Zh/doc/new_features/v46_features_doc.rst)。 + +- **`LoopGuard` / `digest_result`**(`AC_loop_guard_observe` / `AC_loop_guard_reset`、`ac_*`):电脑操作最主要的失败模式是 agent 重复一个无效果的动作 —— 而模型看不到自己的循环。`LoopGuard` 观察 `(tool, args, result)` 流并标记 `repeat`(相同调用 N 次)、`ping_pong`(A-B-A-B)与 `no_op`(观察摘要不变),依执行长度由 `ok`→`warn`→`critical` 升级。与步数/时间预算及离线轨迹评估互补;纯标准库、具确定性。 + ## 本次更新 (2026-06-20) — 坐标空间映射(模型网格 ⇄ 物理像素) 将电脑操作模型的点击转成物理像素。完整参考:[`docs/source/Zh/doc/new_features/v45_features_doc.rst`](../docs/source/Zh/doc/new_features/v45_features_doc.rst)。 diff --git a/README/README_zh-TW.md b/README/README_zh-TW.md index c4578c07..8b758554 100644 --- a/README/README_zh-TW.md +++ b/README/README_zh-TW.md @@ -12,6 +12,7 @@ ## 目錄 +- [本次更新 (2026-06-20) — 卡迴圈守衛(Agent Loop 進度偵測)](#本次更新-2026-06-20--卡迴圈守衛agent-loop-進度偵測) - [本次更新 (2026-06-20) — 座標空間對映(模型網格 ⇄ 實體像素)](#本次更新-2026-06-20--座標空間對映模型網格--實體像素) - [本次更新 (2026-06-20) — 語音指令路由器](#本次更新-2026-06-20--語音指令路由器) - [本次更新 (2026-06-20) — 區域設定感知的數字、貨幣與日期解析](#本次更新-2026-06-20--區域設定感知的數字貨幣與日期解析) @@ -97,6 +98,12 @@ --- +## 本次更新 (2026-06-20) — 卡迴圈守衛(Agent Loop 進度偵測) + +捕捉卡在無進展迴圈的 agent。完整參考:[`docs/source/Zh/doc/new_features/v46_features_doc.rst`](../docs/source/Zh/doc/new_features/v46_features_doc.rst)。 + +- **`LoopGuard` / `digest_result`**(`AC_loop_guard_observe` / `AC_loop_guard_reset`、`ac_*`):電腦操作最主要的失敗模式是 agent 重複一個無效果的動作 —— 而模型看不到自己的迴圈。`LoopGuard` 觀察 `(tool, args, result)` 串流並標記 `repeat`(相同呼叫 N 次)、`ping_pong`(A-B-A-B)與 `no_op`(觀察摘要不變),依執行長度由 `ok`→`warn`→`critical` 升級。與步數/時間預算及離線軌跡評估互補;純標準函式庫、具確定性。 + ## 本次更新 (2026-06-20) — 座標空間對映(模型網格 ⇄ 實體像素) 將電腦操作模型的點擊轉成真實像素。完整參考:[`docs/source/Zh/doc/new_features/v45_features_doc.rst`](../docs/source/Zh/doc/new_features/v45_features_doc.rst)。 diff --git a/docs/source/Eng/doc/new_features/v46_features_doc.rst b/docs/source/Eng/doc/new_features/v46_features_doc.rst new file mode 100644 index 00000000..3ce60b97 --- /dev/null +++ b/docs/source/Eng/doc/new_features/v46_features_doc.rst @@ -0,0 +1,52 @@ +Stuck-Loop Guard (Agent Loop Progress Detection) +================================================ + +The dominant computer-use failure mode is an agent burning its budget repeating +an action that has no effect — and the model usually can't see its own loop, so +it must be caught **mechanically** from outside, by watching the stream of +``(tool, args, result)`` triples. ``LoopGuard`` flags three patterns: + +- ``repeat`` — the same ``(tool, args)`` fired many times in a row; +- ``ping_pong`` — two actions alternating A-B-A-B with no progress; +- ``no_op`` — the observation (a screenshot/state digest) never changes. + +It complements a step/time budget (which can't tell a productive loop from a +stuck one) and the offline trajectory evaluator. Pure standard library +(``collections`` + ``hashlib``), deterministic; imports no ``PySide6``. + +Headless API +------------ + +.. code-block:: python + + from je_auto_control import LoopGuard, digest_result + + guard = LoopGuard(warn=8, critical=15) + for step in agent_steps: + verdict = guard.observe(step.tool, step.args, + digest_result(step.screenshot)) + if verdict.level == "critical": + break # abort: stuck loop + if verdict.level == "warn": + nudge_the_model(verdict.pattern) + +``observe`` returns ``{pattern, level, count}`` where ``level`` is ``ok`` / +``warn`` / ``critical`` once the run length crosses the thresholds. ``count`` is +the length of the detected run. ``digest_result`` makes a stable short hash of a +screenshot/observation (bytes or any JSON-able value). ``reset`` clears history. + +Executor commands +----------------- + +A module-level default guard backs the executor/MCP surfaces so a flow can track +progress across steps: + +================================ =================================================== +Command Effect +================================ =================================================== +``AC_loop_guard_observe`` Feed a step; returns ``{pattern, level, count}``. +``AC_loop_guard_reset`` Clear the default guard's history. +================================ =================================================== + +The same operations are exposed as MCP tools (``ac_loop_guard_observe`` / +``ac_loop_guard_reset``) and as Script Builder commands under **Agent**. diff --git a/docs/source/Eng/eng_index.rst b/docs/source/Eng/eng_index.rst index 35514b17..78f667f4 100644 --- a/docs/source/Eng/eng_index.rst +++ b/docs/source/Eng/eng_index.rst @@ -68,6 +68,7 @@ Comprehensive guides for all AutoControl features. doc/new_features/v43_features_doc doc/new_features/v44_features_doc doc/new_features/v45_features_doc + doc/new_features/v46_features_doc doc/ocr_backends/ocr_backends_doc doc/observability/observability_doc doc/operations_layer/operations_layer_doc diff --git a/docs/source/Zh/doc/new_features/v46_features_doc.rst b/docs/source/Zh/doc/new_features/v46_features_doc.rst new file mode 100644 index 00000000..aa74ba57 --- /dev/null +++ b/docs/source/Zh/doc/new_features/v46_features_doc.rst @@ -0,0 +1,48 @@ +卡迴圈守衛(Agent Loop 進度偵測) +================================ + +電腦操作最主要的失敗模式,是 agent 不斷重複一個沒有效果的動作而耗盡預算 —— 而模型通常 +看不到自己的迴圈,因此必須從外部以**機械方式**偵測,藉由觀察 ``(tool, args, result)`` +三元組串流。``LoopGuard`` 會標記三種模式: + +- ``repeat`` —— 相同的 ``(tool, args)`` 連續觸發多次; +- ``ping_pong`` —— 兩個動作以 A-B-A-B 交替而毫無進展; +- ``no_op`` —— 觀察結果(截圖/狀態摘要)從未改變。 + +它與步數/時間預算(無法分辨有進展的迴圈與卡住的迴圈)以及離線軌跡評估互補。純標準函式 +庫(``collections`` + ``hashlib``)、具確定性;不匯入 ``PySide6``。 + +無頭 API +-------- + +.. code-block:: python + + from je_auto_control import LoopGuard, digest_result + + guard = LoopGuard(warn=8, critical=15) + for step in agent_steps: + verdict = guard.observe(step.tool, step.args, + digest_result(step.screenshot)) + if verdict.level == "critical": + break # 中止:卡住的迴圈 + if verdict.level == "warn": + nudge_the_model(verdict.pattern) + +``observe`` 回傳 ``{pattern, level, count}``,其中 ``level`` 在執行長度跨過門檻後為 +``ok`` / ``warn`` / ``critical``。``count`` 為偵測到的執行長度。``digest_result`` 為截 +圖/觀察結果(位元組或任何可 JSON 化的值)產生穩定的短雜湊。``reset`` 清除歷史。 + +執行器指令 +---------- + +模組層級的預設守衛支撐 executor/MCP 介面,讓流程可跨步驟追蹤進度: + +================================ =================================================== +指令 效果 +================================ =================================================== +``AC_loop_guard_observe`` 餵入一步;回傳 ``{pattern, level, count}``。 +``AC_loop_guard_reset`` 清除預設守衛的歷史。 +================================ =================================================== + +相同操作亦提供為 MCP 工具(``ac_loop_guard_observe`` / ``ac_loop_guard_reset``),以及 +Script Builder 中 **Agent** 分類下的指令。 diff --git a/docs/source/Zh/zh_index.rst b/docs/source/Zh/zh_index.rst index 668fdc02..566b1bd0 100644 --- a/docs/source/Zh/zh_index.rst +++ b/docs/source/Zh/zh_index.rst @@ -68,6 +68,7 @@ AutoControl 所有功能的完整使用指南。 doc/new_features/v43_features_doc doc/new_features/v44_features_doc doc/new_features/v45_features_doc + doc/new_features/v46_features_doc doc/ocr_backends/ocr_backends_doc doc/observability/observability_doc doc/operations_layer/operations_layer_doc diff --git a/je_auto_control/__init__.py b/je_auto_control/__init__.py index 9a565936..668777cf 100644 --- a/je_auto_control/__init__.py +++ b/je_auto_control/__init__.py @@ -255,6 +255,10 @@ from je_auto_control.utils.coordinate_space import ( CoordinateSpace, downscale_png, normalized_space, xga_space, ) +# Mechanical stuck-loop detection for agent loops +from je_auto_control.utils.loop_guard import ( + LoopGuard, LoopVerdict, default_loop_guard, digest_result, +) # Background popup/interrupt watchdog (unattended automation) from je_auto_control.utils.watchdog import ( PopupWatchdog, WatchdogRule, default_popup_watchdog, @@ -710,6 +714,7 @@ def start_autocontrol_gui(*args, **kwargs): "parse_number", "VoiceCommand", "VoiceRouter", "default_voice_router", "CoordinateSpace", "downscale_png", "normalized_space", "xga_space", + "LoopGuard", "LoopVerdict", "default_loop_guard", "digest_result", # MCP server "AuditLogger", "HttpMCPServer", "MCPContent", "MCPPrompt", "MCPPromptArgument", "MCPResource", "MCPServer", "MCPTool", diff --git a/je_auto_control/gui/script_builder/command_schema.py b/je_auto_control/gui/script_builder/command_schema.py index 8d70be51..9893bb2f 100644 --- a/je_auto_control/gui/script_builder/command_schema.py +++ b/je_auto_control/gui/script_builder/command_schema.py @@ -1041,6 +1041,21 @@ def _add_misc_specs(specs: List[CommandSpec]) -> None: ), description="Map a physical-pixel coordinate to a model grid.", )) + specs.append(CommandSpec( + "AC_loop_guard_observe", "Agent", "Loop Guard: Observe Step", + fields=( + FieldSpec("tool", FieldType.STRING, placeholder="AC_click_mouse"), + FieldSpec("args", FieldType.STRING, optional=True, + placeholder='{"x": 10, "y": 20}'), + FieldSpec("result_digest", FieldType.STRING, optional=True), + ), + description="Detect repeat/ping-pong/no-op stuck-loop patterns.", + )) + specs.append(CommandSpec( + "AC_loop_guard_reset", "Agent", "Loop Guard: Reset", + fields=(), + description="Clear the default loop guard's history.", + )) specs.append(CommandSpec( "AC_generate_sop", "Report", "Generate SOP Document", fields=( diff --git a/je_auto_control/utils/executor/action_executor.py b/je_auto_control/utils/executor/action_executor.py index 2a50cdab..5da474f9 100644 --- a/je_auto_control/utils/executor/action_executor.py +++ b/je_auto_control/utils/executor/action_executor.py @@ -3213,6 +3213,22 @@ def _to_model(x: int, y: int, physical_w: int, physical_h: int, return {"x": mx, "y": my} +def _loop_guard_observe(tool: str, args: Any = None, + result_digest: str = "") -> Dict[str, Any]: + """Adapter: feed a step to the default loop guard; report the verdict.""" + from je_auto_control.utils.loop_guard import default_loop_guard + verdict = default_loop_guard.observe(tool, args, result_digest) + return {"pattern": verdict.pattern, "level": verdict.level, + "count": verdict.count} + + +def _loop_guard_reset() -> Dict[str, Any]: + """Adapter: clear the default loop guard's history.""" + from je_auto_control.utils.loop_guard import default_loop_guard + default_loop_guard.reset() + return {"reset": True} + + class Executor: """ Executor @@ -3485,6 +3501,8 @@ def __init__(self): "AC_voice_clear": _voice_clear, "AC_to_physical": _to_physical, "AC_to_model": _to_model, + "AC_loop_guard_observe": _loop_guard_observe, + "AC_loop_guard_reset": _loop_guard_reset, "AC_a11y_record_start": _a11y_record_start, "AC_a11y_record_stop": _a11y_record_stop, "AC_a11y_record_events": _a11y_record_events, diff --git a/je_auto_control/utils/loop_guard/__init__.py b/je_auto_control/utils/loop_guard/__init__.py new file mode 100644 index 00000000..8c06698e --- /dev/null +++ b/je_auto_control/utils/loop_guard/__init__.py @@ -0,0 +1,8 @@ +"""Mechanical stuck-loop detection for agent loops.""" +from je_auto_control.utils.loop_guard.loop_guard import ( + LoopGuard, LoopVerdict, default_loop_guard, digest_result, +) + +__all__ = [ + "LoopGuard", "LoopVerdict", "default_loop_guard", "digest_result", +] diff --git a/je_auto_control/utils/loop_guard/loop_guard.py b/je_auto_control/utils/loop_guard/loop_guard.py new file mode 100644 index 00000000..d139b0b2 --- /dev/null +++ b/je_auto_control/utils/loop_guard/loop_guard.py @@ -0,0 +1,132 @@ +"""Detect when an agent loop is stuck, from outside the model. + +The dominant computer-use failure mode is an agent burning budget repeating an +action that has no effect — the model usually can't see its own loop, so it must +be caught mechanically by watching the stream of ``(tool, args, result)`` triples. +``LoopGuard`` flags three patterns: + +* ``repeat`` — the same ``(tool, args)`` fired many times in a row; +* ``ping_pong`` — two actions alternating A-B-A-B with no progress; +* ``no_op`` — the observation (a screenshot/state digest) never changes. + +It complements a step/time budget (which can't tell a productive loop from a +stuck one) and the offline trajectory evaluator. Pure standard library +(``collections`` + ``hashlib``); deterministic; imports no ``PySide6``. +""" +import hashlib +import json +from collections import deque +from dataclasses import dataclass +from typing import Any, Deque, Optional, Tuple + +LEVEL_OK = "ok" +LEVEL_WARN = "warn" +LEVEL_CRITICAL = "critical" + + +@dataclass(frozen=True) +class LoopVerdict: + """The result of observing one step.""" + + pattern: Optional[str] # None | "repeat" | "ping_pong" | "no_op" + level: str # "ok" | "warn" | "critical" + count: int # length of the detected run + + +def _args_key(args: Any) -> str: + try: + return json.dumps(args, sort_keys=True, default=str) + except (TypeError, ValueError): + return repr(args) + + +def digest_result(obj: Any) -> str: + """Return a stable short digest of a result/observation (e.g. a frame).""" + if isinstance(obj, (bytes, bytearray)): + data = bytes(obj) + else: + data = _args_key(obj).encode("utf-8") + return hashlib.sha256(data).hexdigest()[:16] + + +class LoopGuard: + """Watches a stream of action triples and flags stuck-loop patterns.""" + + def __init__(self, *, warn: int = 8, critical: int = 15, + window: int = 20) -> None: + """``warn``/``critical`` are run-length thresholds; ``window`` caps memory.""" + self._warn = warn + self._critical = critical + self._events: Deque[Tuple[str, str]] = deque(maxlen=window) + + def reset(self) -> None: + """Forget all observed steps.""" + self._events.clear() + + def observe(self, tool: str, args: Any = None, + result_digest: str = "") -> LoopVerdict: + """Record a step and return the strongest stuck-loop verdict.""" + self._events.append((f"{tool}:{_args_key(args)}", result_digest)) + pattern, count = self._classify() + return LoopVerdict(pattern, self._level(pattern, count), count) + + def _classify(self) -> Tuple[Optional[str], int]: + repeat = self._trailing_repeat() + if repeat >= 2: + return "repeat", repeat + ping = self._trailing_ping_pong() + if ping >= 4: + return "ping_pong", ping + no_op = self._trailing_no_op() + if no_op >= 2: + return "no_op", no_op + return None, 0 + + def _trailing_repeat(self) -> int: + keys = [event[0] for event in self._events] + if not keys: + return 0 + last = keys[-1] + count = 0 + for key in reversed(keys): + if key != last: + break + count += 1 + return count + + def _trailing_ping_pong(self) -> int: + keys = [event[0] for event in self._events] + if len(keys) < 4 or keys[-1] == keys[-2]: + return 0 + first, second = keys[-2], keys[-1] + count = 0 + for index, key in enumerate(reversed(keys)): + expected = second if index % 2 == 0 else first + if key != expected: + break + count += 1 + return count + + def _trailing_no_op(self) -> int: + digests = [event[1] for event in self._events] + if not digests or not digests[-1]: + return 0 + last = digests[-1] + count = 0 + for digest in reversed(digests): + if digest != last: + break + count += 1 + return count + + def _level(self, pattern: Optional[str], count: int) -> str: + if pattern is None: + return LEVEL_OK + if count >= self._critical: + return LEVEL_CRITICAL + if count >= self._warn: + return LEVEL_WARN + return LEVEL_OK + + +default_loop_guard = LoopGuard() diff --git a/je_auto_control/utils/mcp_server/tools/_factories.py b/je_auto_control/utils/mcp_server/tools/_factories.py index 64d15f8e..f5e77c5c 100644 --- a/je_auto_control/utils/mcp_server/tools/_factories.py +++ b/je_auto_control/utils/mcp_server/tools/_factories.py @@ -3076,6 +3076,31 @@ def coordinate_space_tools() -> List[MCPTool]: ] +def loop_guard_tools() -> List[MCPTool]: + return [ + MCPTool( + name="ac_loop_guard_observe", + description=("Feed an agent step (tool, args, optional " + "result_digest) to the default stuck-loop guard. " + "Detects repeat / ping_pong / no_op patterns. Returns " + "{pattern, level (ok/warn/critical), count}."), + input_schema=schema( + {"tool": {"type": "string"}, "args": {"type": "object"}, + "result_digest": {"type": "string"}}, ["tool"]), + handler=h.loop_guard_observe, + annotations=SIDE_EFFECT_ONLY, + ), + MCPTool( + name="ac_loop_guard_reset", + description="Clear the default loop guard's history. Returns " + "{reset}.", + input_schema=schema({}), + handler=h.loop_guard_reset, + annotations=SIDE_EFFECT_ONLY, + ), + ] + + def unattended_tools() -> List[MCPTool]: return [ MCPTool( @@ -4137,7 +4162,7 @@ def media_assert_tools() -> List[MCPTool]: credential_lease_tools, egress_tools, approval_testing_tools, trajectory_eval_tools, compliance_tools, agent_trace_tools, video_report_tools, fuzzy_tools, artifact_store_tools, image_dedup_tools, - locale_tools, voice_tools, coordinate_space_tools, + locale_tools, voice_tools, coordinate_space_tools, loop_guard_tools, screen_record_tools, process_and_shell_tools, remote_desktop_tools, gamepad_tools, usb_passthrough_tools, assertion_tools, data_source_tools, diff --git a/je_auto_control/utils/mcp_server/tools/_handlers.py b/je_auto_control/utils/mcp_server/tools/_handlers.py index decc0e77..b7c61f13 100644 --- a/je_auto_control/utils/mcp_server/tools/_handlers.py +++ b/je_auto_control/utils/mcp_server/tools/_handlers.py @@ -1485,6 +1485,19 @@ def to_model(x, y, physical_w, physical_h, model_w, model_h): return {"x": mx, "y": my} +def loop_guard_observe(tool, args=None, result_digest=""): + from je_auto_control.utils.loop_guard import default_loop_guard + verdict = default_loop_guard.observe(tool, args, result_digest) + return {"pattern": verdict.pattern, "level": verdict.level, + "count": verdict.count} + + +def loop_guard_reset(): + from je_auto_control.utils.loop_guard import default_loop_guard + default_loop_guard.reset() + return {"reset": True} + + def vlm_locate(description: str, screen_region: Optional[List[int]] = None, model: Optional[str] = None) -> Optional[List[int]]: diff --git a/test/unit_test/headless/test_loop_guard_batch.py b/test/unit_test/headless/test_loop_guard_batch.py new file mode 100644 index 00000000..0b026121 --- /dev/null +++ b/test/unit_test/headless/test_loop_guard_batch.py @@ -0,0 +1,96 @@ +"""Headless tests for the mechanical stuck-loop guard. Fully deterministic — +synthetic step sequences, no screenshots. Pure stdlib, no Qt imports.""" +import je_auto_control as ac +from je_auto_control.utils.loop_guard import ( + LoopGuard, default_loop_guard, digest_result) + + +def test_repeat_escalates_ok_warn_critical(): + guard = LoopGuard(warn=3, critical=5) + levels = [guard.observe("AC_click", {"x": 1}).level for _ in range(6)] + # 1,2 -> ok; 3,4 -> warn; 5,6 -> critical + assert levels[:2] == ["ok", "ok"] + assert levels[2] == "warn" and levels[3] == "warn" + assert levels[4] == "critical" and levels[5] == "critical" + + +def test_repeat_pattern_and_count(): + guard = LoopGuard(warn=3, critical=5) + guard.observe("AC_click", {"x": 1}) + verdict = guard.observe("AC_click", {"x": 1}) + assert verdict.pattern == "repeat" and verdict.count == 2 + + +def test_distinct_args_not_a_repeat(): + guard = LoopGuard() + guard.observe("AC_click", {"x": 1}) + verdict = guard.observe("AC_click", {"x": 2}) # different args + assert verdict.pattern is None and verdict.level == "ok" + + +def test_ping_pong_detected(): + guard = LoopGuard(warn=4, critical=8) + seq = ["A", "B", "A", "B", "A", "B"] + verdict = None + for name in seq: + verdict = guard.observe(name, None) + assert verdict.pattern == "ping_pong" + assert verdict.count >= 4 + + +def test_no_op_when_observation_unchanged(): + guard = LoopGuard(warn=3, critical=5) + # different actions, but the screen digest never changes + guard.observe("A", None, result_digest="same") + guard.observe("B", None, result_digest="same") + verdict = guard.observe("C", None, result_digest="same") + assert verdict.pattern == "no_op" and verdict.count == 3 + + +def test_reset_clears_history(): + guard = LoopGuard(warn=2, critical=3) + guard.observe("A", {"x": 1}) + guard.observe("A", {"x": 1}) + guard.reset() + verdict = guard.observe("A", {"x": 1}) # only one event after reset + assert verdict.pattern is None and verdict.level == "ok" + + +def test_digest_result_stable_and_bytes(): + assert digest_result(b"abc") == digest_result(b"abc") + assert digest_result({"a": 1}) == digest_result({"a": 1}) + assert digest_result(b"abc") != digest_result(b"abd") + + +# --- wiring --------------------------------------------------------------- + +def test_executor_round_trip(): + default_loop_guard.reset() + try: + ac.execute_action([["AC_loop_guard_observe", + {"tool": "AC_click", "args": {"x": 1}}]]) + rec = ac.execute_action([["AC_loop_guard_observe", + {"tool": "AC_click", "args": {"x": 1}}]]) + verdict = next(v for v in rec.values() if isinstance(v, dict)) + assert verdict["pattern"] == "repeat" and verdict["count"] == 2 + finally: + default_loop_guard.reset() + + +def test_wiring(): + known = ac.executor.known_commands() + assert {"AC_loop_guard_observe", "AC_loop_guard_reset"} <= known + from je_auto_control.utils.mcp_server.tools import ( + build_default_tool_registry) + names = {t.name for t in build_default_tool_registry()} + assert {"ac_loop_guard_observe", "ac_loop_guard_reset"} <= names + from je_auto_control.gui.script_builder.command_schema import _build_specs + cmds = {s.command for s in _build_specs()} + assert {"AC_loop_guard_observe", "AC_loop_guard_reset"} <= cmds + + +def test_facade_exports(): + for attr in ("LoopGuard", "LoopVerdict", "default_loop_guard", + "digest_result"): + assert hasattr(ac, attr) + assert attr in ac.__all__