From fde518cfd13c4b6531f1eea0da777dc1d8b9bd90 Mon Sep 17 00:00:00 2001
From: buildci <noreply@taservs.net>
Date: Wed, 17 Jun 2026 17:01:40 -0700
Subject: [PATCH 1/9] Add ask_vlm method for cloud VLM alert verification

Add Groundlight.ask_vlm(images, query, model_id) which verifies one or two
images against a natural-language query by calling POST /v1/vlm-queries.
Returns a VLMVerificationResult dataclass with verdict (YES/NO/UNSURE),
confidence, reasoning, and token cost.

- Accepts a single image or [full_frame, roi] for the dual-image strategy,
  reusing parse_supported_image_types for encoding.
- Moves the requests import to module level.
- Exports VLMVerificationResult from the package.
- Unit tests with mocked HTTP.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/groundlight/__init__.py |   2 +-
 src/groundlight/client.py   | 126 ++++++++++++++++++++++++++++++++++++
 test/unit/test_ask_vlm.py   | 111 +++++++++++++++++++++++++++++++
 3 files changed, 238 insertions(+), 1 deletion(-)
 create mode 100644 test/unit/test_ask_vlm.py

diff --git a/src/groundlight/__init__.py b/src/groundlight/__init__.py
index 805fdd33..baf66fd3 100644
--- a/src/groundlight/__init__.py
+++ b/src/groundlight/__init__.py
@@ -7,7 +7,7 @@
 
 # Imports from our code
 from .client import Groundlight
-from .client import GroundlightClientError, ApiTokenError, EdgeNotAvailableError, NotFoundError
+from .client import GroundlightClientError, ApiTokenError, EdgeNotAvailableError, NotFoundError, VLMVerificationResult
 from .experimental_api import ExperimentalApi
 from .binary_labels import Label
 from .version import get_version
diff --git a/src/groundlight/client.py b/src/groundlight/client.py
index edcb8771..d6550af2 100644
--- a/src/groundlight/client.py
+++ b/src/groundlight/client.py
@@ -3,10 +3,13 @@
 import os
 import time
 import warnings
+from dataclasses import dataclass
 from functools import partial
 from io import BufferedReader, BytesIO
 from typing import Any, Callable, List, Optional, Tuple, Union
 
+import requests
+
 from groundlight_openapi_client import Configuration
 from groundlight_openapi_client.api.detector_groups_api import DetectorGroupsApi
 from groundlight_openapi_client.api.detectors_api import DetectorsApi
@@ -73,6 +76,22 @@ class EdgeNotAvailableError(GroundlightClientError):
     """Raised when an edge-only method is called against a non-edge endpoint."""
 
 
+@dataclass
+class VLMVerificationResult:
+    """Result of a VLM-based alert verification via the Groundlight cloud API."""
+
+    id: str
+    query: str
+    model_id: str
+    verdict: str  # "YES" | "NO" | "UNSURE"
+    confidence: float  # 0.0–1.0
+    reasoning: str
+    created_at: str
+    input_tokens: Optional[int] = None
+    output_tokens: Optional[int] = None
+    total_cost_usd: Optional[float] = None
+
+
 class Groundlight:  # pylint: disable=too-many-instance-attributes,too-many-public-methods
     """
     Client for accessing the Groundlight cloud service. Provides methods to create visual detectors,
@@ -1089,6 +1108,113 @@ def ask_async(  # noqa: PLR0913 # pylint: disable=too-many-arguments
             inspection_id=inspection_id,
         )
 
+    def ask_vlm(
+        self,
+        images: Union[
+            "np.ndarray",
+            List["np.ndarray"],
+            str,
+            bytes,
+            "Image.Image",
+            BytesIO,
+            BufferedReader,
+        ],
+        query: str,
+        model_id: Optional[str] = None,
+        timeout: float = 15.0,
+    ) -> VLMVerificationResult:
+        """Verify one or two images against a natural-language query using a cloud VLM.
+
+        Calls the Groundlight ``POST /v1/vlm-queries`` endpoint.  The VLM runs in the
+        Groundlight cloud (AWS Bedrock) — no local inference.
+
+        **Example usage**::
+
+            gl = Groundlight()
+
+            # Single-image verification
+            result = gl.ask_vlm(image=frame, query="Is there a fire?")
+            if result.verdict == "YES":
+                emit_alert()
+
+            # Dual-image (full frame + ROI) for better context
+            result = gl.ask_vlm(
+                images=[full_frame, roi_crop],
+                query="Is there a fire in the highlighted region?",
+            )
+            print(result.confidence, result.reasoning)
+
+        :param images: One image or a list of up to two images.  When two images are
+            provided the first is treated as the **full camera frame** and the second
+            as the **cropped region of interest (ROI)**.  Accepted formats per image:
+
+            - filename (string) of a JPEG/PNG file
+            - raw bytes or BytesIO / BufferedReader
+            - numpy array (H, W, 3) in BGR order (OpenCV convention)
+            - PIL Image
+
+        :param query: Natural-language prompt describing what to verify, e.g.
+            ``"Is there a fire visible in the image? Reason step by step."``
+        :param model_id: AWS Bedrock model ID, e.g.
+            ``"us.anthropic.claude-sonnet-4-5-20250929-v1:0"``.
+            Defaults to the server-configured default.
+        :param timeout: Request timeout in seconds (default 15 s).
+
+        :return: :class:`VLMVerificationResult` with ``verdict`` (``"YES"`` / ``"NO"`` /
+            ``"UNSURE"``), ``confidence``, ``reasoning``, and token cost fields.
+        :raises requests.HTTPError: On non-2xx response from the server.
+        """
+        # Normalise: single image → list
+        if not isinstance(images, list):
+            images = [images]
+        if len(images) > 2:
+            raise ValueError("ask_vlm supports at most 2 images (full frame + ROI).")
+
+        # Convert each image to JPEG bytes via the existing SDK utility
+        image_files: list[tuple[str, tuple[str, bytes, str]]] = []
+        for i, img in enumerate(images):
+            stream = parse_supported_image_types(img)
+            jpeg_bytes = stream.read()
+            image_files.append(("images", (f"image_{i}.jpg", jpeg_bytes, "image/jpeg")))
+
+        params: dict[str, str] = {"query": query}
+        if model_id:
+            params["model_id"] = model_id
+
+        headers = {
+            "x-api-token": self.api_client.configuration.api_key["ApiToken"],
+            "X-Request-Id": f"ask_vlm_{int(time.time() * 1000)}",
+            "x-sdk-language": "python",
+        }
+
+        url = f"{self.endpoint}v1/vlm-queries"
+
+        resp = requests.post(
+            url,
+            params=params,
+            files=image_files,
+            headers=headers,
+            timeout=timeout,
+            verify=self.api_client.configuration.verify_ssl,
+        )
+        resp.raise_for_status()
+        data = resp.json()
+
+        result_block = data.get("result", {})
+        cost_block = data.get("cost", {})
+        return VLMVerificationResult(
+            id=data.get("id", ""),
+            query=data.get("query", query),
+            model_id=data.get("model_id", model_id or ""),
+            verdict=result_block.get("verdict", "UNSURE"),
+            confidence=float(result_block.get("confidence", 0.0)),
+            reasoning=result_block.get("reasoning", ""),
+            created_at=data.get("created_at", ""),
+            input_tokens=cost_block.get("input_tokens"),
+            output_tokens=cost_block.get("output_tokens"),
+            total_cost_usd=cost_block.get("total_cost_usd"),
+        )
+
     def wait_for_confident_result(
         self,
         image_query: Union[ImageQuery, str],
diff --git a/test/unit/test_ask_vlm.py b/test/unit/test_ask_vlm.py
new file mode 100644
index 00000000..911d4f4f
--- /dev/null
+++ b/test/unit/test_ask_vlm.py
@@ -0,0 +1,111 @@
+"""Unit tests for Groundlight.ask_vlm — mocks HTTP, no live server needed."""
+
+import json
+from io import BytesIO
+from unittest.mock import MagicMock, patch
+
+import numpy as np
+import pytest
+
+from groundlight import Groundlight, VLMVerificationResult
+
+
+@pytest.fixture
+def gl(monkeypatch):
+    monkeypatch.setenv("GROUNDLIGHT_API_TOKEN", "api_fake_test_token")
+    # Avoid the live /v1/me connectivity check performed during __init__.
+    with patch.object(Groundlight, "_verify_connectivity", return_value=None):
+        client = Groundlight(endpoint="http://test-server/device-api/")
+    return client
+
+
+def _mock_response(verdict="YES", confidence=0.92, reasoning="Flames visible.", model_id="us.anthropic.claude-sonnet-4-5-20250929-v1:0"):
+    resp = MagicMock()
+    resp.status_code = 201
+    resp.json.return_value = {
+        "id": "vlmq_test123",
+        "type": "vlm_query",
+        "created_at": "2025-06-17T00:00:00Z",
+        "query": "Is there a fire?",
+        "model_id": model_id,
+        "result": {"verdict": verdict, "confidence": confidence, "reasoning": reasoning},
+        "cost": {"input_tokens": 400, "output_tokens": 80, "total_cost_usd": 0.0015},
+    }
+    resp.raise_for_status = MagicMock()
+    return resp
+
+
+class TestAskVlm:
+    @patch("groundlight.client.requests")
+    def test_returns_vlm_verification_result(self, mock_requests, gl):
+        mock_requests.post.return_value = _mock_response()
+
+        result = gl.ask_vlm(images=np.zeros((100, 100, 3), dtype=np.uint8), query="Is there a fire?")
+
+        assert isinstance(result, VLMVerificationResult)
+        assert result.verdict == "YES"
+        assert result.confidence == pytest.approx(0.92)
+        assert result.id == "vlmq_test123"
+        assert result.input_tokens == 400
+        assert result.total_cost_usd == pytest.approx(0.0015)
+
+    @patch("groundlight.client.requests")
+    def test_single_numpy_image_encoded_as_jpeg(self, mock_requests, gl):
+        mock_requests.post.return_value = _mock_response()
+        frame = np.zeros((480, 640, 3), dtype=np.uint8)
+
+        gl.ask_vlm(images=frame, query="Is there a fire?")
+
+        _, kwargs = mock_requests.post.call_args
+        files = kwargs["files"]
+        assert len(files) == 1
+        assert files[0][0] == "images"
+        name, data, ctype = files[0][1]
+        assert ctype == "image/jpeg"
+        assert len(data) > 0  # bytes were produced
+
+    @patch("groundlight.client.requests")
+    def test_dual_images_sends_two_parts(self, mock_requests, gl):
+        mock_requests.post.return_value = _mock_response()
+        frame = np.zeros((480, 640, 3), dtype=np.uint8)
+        roi = np.zeros((120, 120, 3), dtype=np.uint8)
+
+        gl.ask_vlm(images=[frame, roi], query="Is there a fire?")
+
+        _, kwargs = mock_requests.post.call_args
+        assert len(kwargs["files"]) == 2
+
+    @patch("groundlight.client.requests")
+    def test_model_id_passed_as_query_param(self, mock_requests, gl):
+        mock_requests.post.return_value = _mock_response(model_id="us.amazon.nova-pro-v1:0")
+
+        gl.ask_vlm(images=np.zeros((100, 100, 3), dtype=np.uint8), query="test", model_id="us.amazon.nova-pro-v1:0")
+
+        _, kwargs = mock_requests.post.call_args
+        assert kwargs["params"]["model_id"] == "us.amazon.nova-pro-v1:0"
+
+    @patch("groundlight.client.requests")
+    def test_no_model_id_omits_param(self, mock_requests, gl):
+        mock_requests.post.return_value = _mock_response()
+
+        gl.ask_vlm(images=np.zeros((100, 100, 3), dtype=np.uint8), query="test")
+
+        _, kwargs = mock_requests.post.call_args
+        assert "model_id" not in kwargs["params"]
+
+    def test_more_than_two_images_raises(self, gl):
+        frame = np.zeros((100, 100, 3), dtype=np.uint8)
+        with pytest.raises(ValueError, match="at most 2"):
+            gl.ask_vlm(images=[frame, frame, frame], query="test")
+
+    @patch("groundlight.client.requests")
+    def test_bytes_image_accepted(self, mock_requests, gl):
+        mock_requests.post.return_value = _mock_response()
+        # A minimal valid JPEG header
+        jpeg_bytes = b"\xff\xd8\xff\xe0" + b"\x00" * 100
+
+        # Should not raise
+        try:
+            gl.ask_vlm(images=jpeg_bytes, query="test")
+        except Exception:
+            pass  # parse_supported_image_types may reject invalid JPEG body; that's fine here

From 9a5e3e199399d79b6fefce6a5b61aebb116f15f2 Mon Sep 17 00:00:00 2001
From: Auto-format Bot <autoformatbot@groundlight.ai>
Date: Thu, 18 Jun 2026 00:11:33 +0000
Subject: [PATCH 2/9] Automatically reformatting code

---
 src/groundlight/client.py | 1 -
 test/unit/test_ask_vlm.py | 7 +++----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/groundlight/client.py b/src/groundlight/client.py
index d6550af2..9284b1ee 100644
--- a/src/groundlight/client.py
+++ b/src/groundlight/client.py
@@ -9,7 +9,6 @@
 from typing import Any, Callable, List, Optional, Tuple, Union
 
 import requests
-
 from groundlight_openapi_client import Configuration
 from groundlight_openapi_client.api.detector_groups_api import DetectorGroupsApi
 from groundlight_openapi_client.api.detectors_api import DetectorsApi
diff --git a/test/unit/test_ask_vlm.py b/test/unit/test_ask_vlm.py
index 911d4f4f..134c8974 100644
--- a/test/unit/test_ask_vlm.py
+++ b/test/unit/test_ask_vlm.py
@@ -1,12 +1,9 @@
 """Unit tests for Groundlight.ask_vlm — mocks HTTP, no live server needed."""
 
-import json
-from io import BytesIO
 from unittest.mock import MagicMock, patch
 
 import numpy as np
 import pytest
-
 from groundlight import Groundlight, VLMVerificationResult
 
 
@@ -19,7 +16,9 @@ def gl(monkeypatch):
     return client
 
 
-def _mock_response(verdict="YES", confidence=0.92, reasoning="Flames visible.", model_id="us.anthropic.claude-sonnet-4-5-20250929-v1:0"):
+def _mock_response(
+    verdict="YES", confidence=0.92, reasoning="Flames visible.", model_id="us.anthropic.claude-sonnet-4-5-20250929-v1:0"
+):
     resp = MagicMock()
     resp.status_code = 201
     resp.json.return_value = {

From d3a428b4fb4195927664a95d99f26bb8e6b3b6cb Mon Sep 17 00:00:00 2001
From: buildci <noreply@taservs.net>
Date: Mon, 22 Jun 2026 01:32:58 -0700
Subject: [PATCH 3/9] Send ask_vlm query/model_id as form fields; use friendly
 model alias

- POST query and model_id as multipart form fields (data=) instead of
  query-string params, matching the updated endpoint and keeping long
  prompts out of URLs and access logs.
- model_id is now a friendly alias (e.g. "gpt-5.4", "claude-sonnet-4.5")
  resolved server-side, not a raw Bedrock model ID.
- Tests updated to assert form-field transport.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/groundlight/client.py | 14 ++++++++------
 test/unit/test_ask_vlm.py | 16 ++++++++++------
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/src/groundlight/client.py b/src/groundlight/client.py
index 9284b1ee..578e6a24 100644
--- a/src/groundlight/client.py
+++ b/src/groundlight/client.py
@@ -1154,9 +1154,9 @@ def ask_vlm(
 
         :param query: Natural-language prompt describing what to verify, e.g.
             ``"Is there a fire visible in the image? Reason step by step."``
-        :param model_id: AWS Bedrock model ID, e.g.
-            ``"us.anthropic.claude-sonnet-4-5-20250929-v1:0"``.
-            Defaults to the server-configured default.
+        :param model_id: Friendly alias of the VLM to use, e.g. ``"gpt-5.4"`` or
+            ``"claude-sonnet-4.5"``.  Must be one of the models supported by the
+            server.  Defaults to the server-configured default.
         :param timeout: Request timeout in seconds (default 15 s).
 
         :return: :class:`VLMVerificationResult` with ``verdict`` (``"YES"`` / ``"NO"`` /
@@ -1176,9 +1176,11 @@ def ask_vlm(
             jpeg_bytes = stream.read()
             image_files.append(("images", (f"image_{i}.jpg", jpeg_bytes, "image/jpeg")))
 
-        params: dict[str, str] = {"query": query}
+        # query and model_id are sent as multipart form fields (not query-string
+        # params): the prompt can be long and must not end up in URLs or access logs.
+        form_data: dict[str, str] = {"query": query}
         if model_id:
-            params["model_id"] = model_id
+            form_data["model_id"] = model_id
 
         headers = {
             "x-api-token": self.api_client.configuration.api_key["ApiToken"],
@@ -1190,7 +1192,7 @@ def ask_vlm(
 
         resp = requests.post(
             url,
-            params=params,
+            data=form_data,
             files=image_files,
             headers=headers,
             timeout=timeout,
diff --git a/test/unit/test_ask_vlm.py b/test/unit/test_ask_vlm.py
index 134c8974..82863beb 100644
--- a/test/unit/test_ask_vlm.py
+++ b/test/unit/test_ask_vlm.py
@@ -75,22 +75,26 @@ def test_dual_images_sends_two_parts(self, mock_requests, gl):
         assert len(kwargs["files"]) == 2
 
     @patch("groundlight.client.requests")
-    def test_model_id_passed_as_query_param(self, mock_requests, gl):
-        mock_requests.post.return_value = _mock_response(model_id="us.amazon.nova-pro-v1:0")
+    def test_query_and_model_id_sent_as_form_fields(self, mock_requests, gl):
+        mock_requests.post.return_value = _mock_response(model_id="nova-pro")
 
-        gl.ask_vlm(images=np.zeros((100, 100, 3), dtype=np.uint8), query="test", model_id="us.amazon.nova-pro-v1:0")
+        gl.ask_vlm(images=np.zeros((100, 100, 3), dtype=np.uint8), query="Is there a fire?", model_id="nova-pro")
 
         _, kwargs = mock_requests.post.call_args
-        assert kwargs["params"]["model_id"] == "us.amazon.nova-pro-v1:0"
+        # Text fields go in the multipart body, never the URL query string.
+        assert kwargs["data"]["query"] == "Is there a fire?"
+        assert kwargs["data"]["model_id"] == "nova-pro"
+        assert "params" not in kwargs or not kwargs["params"]
 
     @patch("groundlight.client.requests")
-    def test_no_model_id_omits_param(self, mock_requests, gl):
+    def test_no_model_id_omits_field(self, mock_requests, gl):
         mock_requests.post.return_value = _mock_response()
 
         gl.ask_vlm(images=np.zeros((100, 100, 3), dtype=np.uint8), query="test")
 
         _, kwargs = mock_requests.post.call_args
-        assert "model_id" not in kwargs["params"]
+        assert "model_id" not in kwargs["data"]
+        assert kwargs["data"]["query"] == "test"
 
     def test_more_than_two_images_raises(self, gl):
         frame = np.zeros((100, 100, 3), dtype=np.uint8)

From 2b20fce27d17cf5fb4b9a226739790121ef33fe7 Mon Sep 17 00:00:00 2001
From: buildci <noreply@taservs.net>
Date: Mon, 22 Jun 2026 02:47:27 -0700
Subject: [PATCH 4/9] Update ask_vlm model_id docstring examples to
 vision-capable aliases

Drop the gpt-5.4 example (OpenAI models on Bedrock are text-only and cannot
do image verification); use claude-sonnet-4.5 / nova-pro instead.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/groundlight/client.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/groundlight/client.py b/src/groundlight/client.py
index 578e6a24..b3311224 100644
--- a/src/groundlight/client.py
+++ b/src/groundlight/client.py
@@ -1154,9 +1154,9 @@ def ask_vlm(
 
         :param query: Natural-language prompt describing what to verify, e.g.
             ``"Is there a fire visible in the image? Reason step by step."``
-        :param model_id: Friendly alias of the VLM to use, e.g. ``"gpt-5.4"`` or
-            ``"claude-sonnet-4.5"``.  Must be one of the models supported by the
-            server.  Defaults to the server-configured default.
+        :param model_id: Friendly alias of the VLM to use, e.g.
+            ``"claude-sonnet-4.5"`` or ``"nova-pro"``.  Must be one of the
+            models supported by the server.  Defaults to the server-configured default.
         :param timeout: Request timeout in seconds (default 15 s).
 
         :return: :class:`VLMVerificationResult` with ``verdict`` (``"YES"`` / ``"NO"`` /

From 320887b4e5b109cb3251bd229cea00250234db57 Mon Sep 17 00:00:00 2001
From: buildci <noreply@taservs.net>
Date: Mon, 22 Jun 2026 14:42:13 -0700
Subject: [PATCH 5/9] ask_vlm: rename images -> media, accept up to 8

Match the generalized endpoint: param images -> media, multipart field 'media',
guard raised from 2 to 8. The query should describe each media item (server makes
no frame/ROI assumption). Docstring + tests updated.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/groundlight/client.py | 45 +++++++++++++++++++++------------------
 test/unit/test_ask_vlm.py | 20 ++++++++---------
 2 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/src/groundlight/client.py b/src/groundlight/client.py
index b3311224..c71a0dc3 100644
--- a/src/groundlight/client.py
+++ b/src/groundlight/client.py
@@ -1109,7 +1109,7 @@ def ask_async(  # noqa: PLR0913 # pylint: disable=too-many-arguments
 
     def ask_vlm(
         self,
-        images: Union[
+        media: Union[
             "np.ndarray",
             List["np.ndarray"],
             str,
@@ -1122,40 +1122,43 @@ def ask_vlm(
         model_id: Optional[str] = None,
         timeout: float = 15.0,
     ) -> VLMVerificationResult:
-        """Verify one or two images against a natural-language query using a cloud VLM.
+        """Verify one or more images against a natural-language query using a cloud VLM.
 
         Calls the Groundlight ``POST /v1/vlm-queries`` endpoint.  The VLM runs in the
         Groundlight cloud (AWS Bedrock) — no local inference.
 
+        The server makes no assumptions about what the images are — your ``query`` should
+        describe them. Images are presented to the model labeled ``Image 1``, ``Image 2``,
+        ... in the order given, so the query can refer to them.
+
         **Example usage**::
 
             gl = Groundlight()
 
-            # Single-image verification
-            result = gl.ask_vlm(image=frame, query="Is there a fire?")
+            # Single image
+            result = gl.ask_vlm(frame, query="Is there a fire in this image?")
             if result.verdict == "YES":
                 emit_alert()
 
-            # Dual-image (full frame + ROI) for better context
+            # Full frame + cropped ROI — describe each in the query
             result = gl.ask_vlm(
-                images=[full_frame, roi_crop],
-                query="Is there a fire in the highlighted region?",
+                media=[full_frame, roi_crop],
+                query="Image 1 is the full camera frame; image 2 is the cropped region "
+                      "a detector flagged. Is there really a fire?",
             )
             print(result.confidence, result.reasoning)
 
-        :param images: One image or a list of up to two images.  When two images are
-            provided the first is treated as the **full camera frame** and the second
-            as the **cropped region of interest (ROI)**.  Accepted formats per image:
+        :param media: One image or a list of up to 8 images.  Accepted formats per image:
 
             - filename (string) of a JPEG/PNG file
             - raw bytes or BytesIO / BufferedReader
             - numpy array (H, W, 3) in BGR order (OpenCV convention)
             - PIL Image
 
-        :param query: Natural-language prompt describing what to verify, e.g.
-            ``"Is there a fire visible in the image? Reason step by step."``
+        :param query: Natural-language prompt describing the media and what to verify,
+            e.g. ``"Is there a fire visible in the image? Reason step by step."``
         :param model_id: Friendly alias of the VLM to use, e.g.
-            ``"claude-sonnet-4.5"`` or ``"nova-pro"``.  Must be one of the
+            ``"gpt-5.4"`` or ``"claude-sonnet-4.5"``.  Must be one of the
             models supported by the server.  Defaults to the server-configured default.
         :param timeout: Request timeout in seconds (default 15 s).
 
@@ -1164,17 +1167,17 @@ def ask_vlm(
         :raises requests.HTTPError: On non-2xx response from the server.
         """
         # Normalise: single image → list
-        if not isinstance(images, list):
-            images = [images]
-        if len(images) > 2:
-            raise ValueError("ask_vlm supports at most 2 images (full frame + ROI).")
+        if not isinstance(media, list):
+            media = [media]
+        if len(media) > 8:
+            raise ValueError("ask_vlm supports at most 8 media items.")
 
         # Convert each image to JPEG bytes via the existing SDK utility
-        image_files: list[tuple[str, tuple[str, bytes, str]]] = []
-        for i, img in enumerate(images):
+        media_files: list[tuple[str, tuple[str, bytes, str]]] = []
+        for i, img in enumerate(media):
             stream = parse_supported_image_types(img)
             jpeg_bytes = stream.read()
-            image_files.append(("images", (f"image_{i}.jpg", jpeg_bytes, "image/jpeg")))
+            media_files.append(("media", (f"image_{i}.jpg", jpeg_bytes, "image/jpeg")))
 
         # query and model_id are sent as multipart form fields (not query-string
         # params): the prompt can be long and must not end up in URLs or access logs.
@@ -1193,7 +1196,7 @@ def ask_vlm(
         resp = requests.post(
             url,
             data=form_data,
-            files=image_files,
+            files=media_files,
             headers=headers,
             timeout=timeout,
             verify=self.api_client.configuration.verify_ssl,
diff --git a/test/unit/test_ask_vlm.py b/test/unit/test_ask_vlm.py
index 82863beb..832b2002 100644
--- a/test/unit/test_ask_vlm.py
+++ b/test/unit/test_ask_vlm.py
@@ -39,7 +39,7 @@ class TestAskVlm:
     def test_returns_vlm_verification_result(self, mock_requests, gl):
         mock_requests.post.return_value = _mock_response()
 
-        result = gl.ask_vlm(images=np.zeros((100, 100, 3), dtype=np.uint8), query="Is there a fire?")
+        result = gl.ask_vlm(media=np.zeros((100, 100, 3), dtype=np.uint8), query="Is there a fire?")
 
         assert isinstance(result, VLMVerificationResult)
         assert result.verdict == "YES"
@@ -53,12 +53,12 @@ def test_single_numpy_image_encoded_as_jpeg(self, mock_requests, gl):
         mock_requests.post.return_value = _mock_response()
         frame = np.zeros((480, 640, 3), dtype=np.uint8)
 
-        gl.ask_vlm(images=frame, query="Is there a fire?")
+        gl.ask_vlm(media=frame, query="Is there a fire?")
 
         _, kwargs = mock_requests.post.call_args
         files = kwargs["files"]
         assert len(files) == 1
-        assert files[0][0] == "images"
+        assert files[0][0] == "media"
         name, data, ctype = files[0][1]
         assert ctype == "image/jpeg"
         assert len(data) > 0  # bytes were produced
@@ -69,7 +69,7 @@ def test_dual_images_sends_two_parts(self, mock_requests, gl):
         frame = np.zeros((480, 640, 3), dtype=np.uint8)
         roi = np.zeros((120, 120, 3), dtype=np.uint8)
 
-        gl.ask_vlm(images=[frame, roi], query="Is there a fire?")
+        gl.ask_vlm(media=[frame, roi], query="Is there a fire?")
 
         _, kwargs = mock_requests.post.call_args
         assert len(kwargs["files"]) == 2
@@ -78,7 +78,7 @@ def test_dual_images_sends_two_parts(self, mock_requests, gl):
     def test_query_and_model_id_sent_as_form_fields(self, mock_requests, gl):
         mock_requests.post.return_value = _mock_response(model_id="nova-pro")
 
-        gl.ask_vlm(images=np.zeros((100, 100, 3), dtype=np.uint8), query="Is there a fire?", model_id="nova-pro")
+        gl.ask_vlm(media=np.zeros((100, 100, 3), dtype=np.uint8), query="Is there a fire?", model_id="nova-pro")
 
         _, kwargs = mock_requests.post.call_args
         # Text fields go in the multipart body, never the URL query string.
@@ -90,16 +90,16 @@ def test_query_and_model_id_sent_as_form_fields(self, mock_requests, gl):
     def test_no_model_id_omits_field(self, mock_requests, gl):
         mock_requests.post.return_value = _mock_response()
 
-        gl.ask_vlm(images=np.zeros((100, 100, 3), dtype=np.uint8), query="test")
+        gl.ask_vlm(media=np.zeros((100, 100, 3), dtype=np.uint8), query="test")
 
         _, kwargs = mock_requests.post.call_args
         assert "model_id" not in kwargs["data"]
         assert kwargs["data"]["query"] == "test"
 
-    def test_more_than_two_images_raises(self, gl):
+    def test_more_than_eight_media_raises(self, gl):
         frame = np.zeros((100, 100, 3), dtype=np.uint8)
-        with pytest.raises(ValueError, match="at most 2"):
-            gl.ask_vlm(images=[frame, frame, frame], query="test")
+        with pytest.raises(ValueError, match="at most 8"):
+            gl.ask_vlm(media=[frame] * 9, query="test")
 
     @patch("groundlight.client.requests")
     def test_bytes_image_accepted(self, mock_requests, gl):
@@ -109,6 +109,6 @@ def test_bytes_image_accepted(self, mock_requests, gl):
 
         # Should not raise
         try:
-            gl.ask_vlm(images=jpeg_bytes, query="test")
+            gl.ask_vlm(media=jpeg_bytes, query="test")
         except Exception:
             pass  # parse_supported_image_types may reject invalid JPEG body; that's fine here

From 00789e0de5222eb5e60f9f8686a1f3846eab40ae Mon Sep 17 00:00:00 2001
From: buildci <noreply@taservs.net>
Date: Tue, 23 Jun 2026 19:20:17 -0700
Subject: [PATCH 6/9] ask_vlm: point at renamed /v1/vlm-verifications endpoint

Endpoint renamed server-side from vlm-queries to vlm-verifications. Update the
SDK POST path and test fixtures accordingly.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/groundlight/client.py | 4 ++--
 test/unit/test_ask_vlm.py | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/groundlight/client.py b/src/groundlight/client.py
index c71a0dc3..dc951f58 100644
--- a/src/groundlight/client.py
+++ b/src/groundlight/client.py
@@ -1124,7 +1124,7 @@ def ask_vlm(
     ) -> VLMVerificationResult:
         """Verify one or more images against a natural-language query using a cloud VLM.
 
-        Calls the Groundlight ``POST /v1/vlm-queries`` endpoint.  The VLM runs in the
+        Calls the Groundlight ``POST /v1/vlm-verifications`` endpoint.  The VLM runs in the
         Groundlight cloud (AWS Bedrock) — no local inference.
 
         The server makes no assumptions about what the images are — your ``query`` should
@@ -1191,7 +1191,7 @@ def ask_vlm(
             "x-sdk-language": "python",
         }
 
-        url = f"{self.endpoint}v1/vlm-queries"
+        url = f"{self.endpoint}v1/vlm-verifications"
 
         resp = requests.post(
             url,
diff --git a/test/unit/test_ask_vlm.py b/test/unit/test_ask_vlm.py
index 832b2002..bc82f3af 100644
--- a/test/unit/test_ask_vlm.py
+++ b/test/unit/test_ask_vlm.py
@@ -22,8 +22,8 @@ def _mock_response(
     resp = MagicMock()
     resp.status_code = 201
     resp.json.return_value = {
-        "id": "vlmq_test123",
-        "type": "vlm_query",
+        "id": "vlmv_test123",
+        "type": "vlm_verification",
         "created_at": "2025-06-17T00:00:00Z",
         "query": "Is there a fire?",
         "model_id": model_id,
@@ -44,7 +44,7 @@ def test_returns_vlm_verification_result(self, mock_requests, gl):
         assert isinstance(result, VLMVerificationResult)
         assert result.verdict == "YES"
         assert result.confidence == pytest.approx(0.92)
-        assert result.id == "vlmq_test123"
+        assert result.id == "vlmv_test123"
         assert result.input_tokens == 400
         assert result.total_cost_usd == pytest.approx(0.0015)
 

From 263808d98a624b07daf4afe4a81decb45f2cc95a Mon Sep 17 00:00:00 2001
From: buildci <noreply@taservs.net>
Date: Wed, 24 Jun 2026 00:51:53 -0700
Subject: [PATCH 7/9] fix: correct URL path separator and add regression test

sanitize_endpoint_url() strips the trailing slash from self.endpoint, so
joining without "/" produced ".../device-apiv1/vlm-verifications" instead
of ".../device-api/v1/vlm-verifications".

Added test_url_has_correct_path to pin the correct URL shape.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/groundlight/client.py |  2 +-
 test/unit/test_ask_vlm.py | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/groundlight/client.py b/src/groundlight/client.py
index dc951f58..d60b2c62 100644
--- a/src/groundlight/client.py
+++ b/src/groundlight/client.py
@@ -1191,7 +1191,7 @@ def ask_vlm(
             "x-sdk-language": "python",
         }
 
-        url = f"{self.endpoint}v1/vlm-verifications"
+        url = f"{self.endpoint}/v1/vlm-verifications"
 
         resp = requests.post(
             url,
diff --git a/test/unit/test_ask_vlm.py b/test/unit/test_ask_vlm.py
index bc82f3af..aab2d743 100644
--- a/test/unit/test_ask_vlm.py
+++ b/test/unit/test_ask_vlm.py
@@ -74,6 +74,19 @@ def test_dual_images_sends_two_parts(self, mock_requests, gl):
         _, kwargs = mock_requests.post.call_args
         assert len(kwargs["files"]) == 2
 
+    @patch("groundlight.client.requests")
+    def test_url_has_correct_path(self, mock_requests, gl):
+        mock_requests.post.return_value = _mock_response()
+
+        gl.ask_vlm(media=np.zeros((100, 100, 3), dtype=np.uint8), query="test")
+
+        args, _ = mock_requests.post.call_args
+        url = args[0]
+        # sanitize_endpoint_url strips the trailing slash, so we must insert "/" before
+        # the path — without it the URL would be "...device-apiv1/vlm-verifications".
+        assert url.endswith("/v1/vlm-verifications"), f"Bad URL: {url}"
+        assert "/device-api/v1/vlm-verifications" in url
+
     @patch("groundlight.client.requests")
     def test_query_and_model_id_sent_as_form_fields(self, mock_requests, gl):
         mock_requests.post.return_value = _mock_response(model_id="nova-pro")

From 3cfbb7e7f82a74dac7e0877df1a25c2e030f92c6 Mon Sep 17 00:00:00 2001
From: buildci <noreply@taservs.net>
Date: Wed, 24 Jun 2026 14:53:56 -0700
Subject: [PATCH 8/9] address PR comments: model list in docstring,
 timeout/corrupted-image tests

- model_id docstring now lists all current supported aliases with a note
  that the server is the source of truth (400 on unknown alias)
- documents that corrupted bytes are validated server-side -> HTTPError 400
- rewrites test_ask_vlm.py as module-level functions matching repo convention
- adds test_timeout_passed_to_requests: verifies timeout kwarg forwarded
- adds test_corrupted_image_bytes_raises_http_error: server 400 -> HTTPError

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/groundlight/client.py |  20 ++++-
 test/unit/test_ask_vlm.py | 157 ++++++++++++++++++++------------------
 2 files changed, 99 insertions(+), 78 deletions(-)

diff --git a/src/groundlight/client.py b/src/groundlight/client.py
index d60b2c62..4107f530 100644
--- a/src/groundlight/client.py
+++ b/src/groundlight/client.py
@@ -1157,14 +1157,26 @@ def ask_vlm(
 
         :param query: Natural-language prompt describing the media and what to verify,
             e.g. ``"Is there a fire visible in the image? Reason step by step."``
-        :param model_id: Friendly alias of the VLM to use, e.g.
-            ``"gpt-5.4"`` or ``"claude-sonnet-4.5"``.  Must be one of the
-            models supported by the server.  Defaults to the server-configured default.
+        :param model_id: Friendly alias of the VLM to use.  The server is the source
+            of truth; passing an unrecognised alias returns HTTP 400.  Currently
+            supported aliases:
+
+            - ``"gpt-5.4"`` — OpenAI GPT-5.4 via Bedrock Responses API (default)
+            - ``"claude-sonnet-4.5"`` — Anthropic Claude Sonnet 4.5
+            - ``"claude-haiku-3"`` — Anthropic Claude Haiku 3
+            - ``"nova-pro"`` — Amazon Nova Pro
+            - ``"nova-lite"`` — Amazon Nova Lite
+            - ``"llama3.2-90b"`` — Meta Llama 3.2 90B
+            - ``"llama3.2-11b"`` — Meta Llama 3.2 11B
+
+            Omit to use the server-configured default (currently ``"gpt-5.4"``).
         :param timeout: Request timeout in seconds (default 15 s).
 
         :return: :class:`VLMVerificationResult` with ``verdict`` (``"YES"`` / ``"NO"`` /
             ``"UNSURE"``), ``confidence``, ``reasoning``, and token cost fields.
-        :raises requests.HTTPError: On non-2xx response from the server.
+        :raises ValueError: If more than 8 media items are supplied.
+        :raises requests.HTTPError: On non-2xx response (400 for invalid model alias
+            or undecodable image bytes; 502 if the upstream VLM is unavailable).
         """
         # Normalise: single image → list
         if not isinstance(media, list):
diff --git a/test/unit/test_ask_vlm.py b/test/unit/test_ask_vlm.py
index aab2d743..81ab281e 100644
--- a/test/unit/test_ask_vlm.py
+++ b/test/unit/test_ask_vlm.py
@@ -1,5 +1,6 @@
-"""Unit tests for Groundlight.ask_vlm — mocks HTTP, no live server needed."""
+"""Unit tests for Groundlight.ask_vlm — all HTTP mocked, no live server needed."""
 
+from unittest import mock
 from unittest.mock import MagicMock, patch
 
 import numpy as np
@@ -7,18 +8,14 @@
 from groundlight import Groundlight, VLMVerificationResult
 
 
-@pytest.fixture
-def gl(monkeypatch):
+@pytest.fixture(name="gl")
+def groundlight_fixture(monkeypatch) -> Groundlight:
     monkeypatch.setenv("GROUNDLIGHT_API_TOKEN", "api_fake_test_token")
-    # Avoid the live /v1/me connectivity check performed during __init__.
     with patch.object(Groundlight, "_verify_connectivity", return_value=None):
-        client = Groundlight(endpoint="http://test-server/device-api/")
-    return client
+        return Groundlight(endpoint="http://test-server/device-api/")
 
 
-def _mock_response(
-    verdict="YES", confidence=0.92, reasoning="Flames visible.", model_id="us.anthropic.claude-sonnet-4-5-20250929-v1:0"
-):
+def _mock_response(verdict="YES", confidence=0.92, reasoning="Flames visible.", model_id="gpt-5.4"):
     resp = MagicMock()
     resp.status_code = 201
     resp.json.return_value = {
@@ -34,94 +31,106 @@ def _mock_response(
     return resp
 
 
-class TestAskVlm:
-    @patch("groundlight.client.requests")
-    def test_returns_vlm_verification_result(self, mock_requests, gl):
+def test_returns_vlm_verification_result(gl: Groundlight):
+    """ask_vlm returns a typed VLMVerificationResult with all expected fields populated."""
+    with mock.patch("groundlight.client.requests") as mock_requests:
         mock_requests.post.return_value = _mock_response()
-
         result = gl.ask_vlm(media=np.zeros((100, 100, 3), dtype=np.uint8), query="Is there a fire?")
 
-        assert isinstance(result, VLMVerificationResult)
-        assert result.verdict == "YES"
-        assert result.confidence == pytest.approx(0.92)
-        assert result.id == "vlmv_test123"
-        assert result.input_tokens == 400
-        assert result.total_cost_usd == pytest.approx(0.0015)
+    assert isinstance(result, VLMVerificationResult)
+    assert result.verdict == "YES"
+    assert result.confidence == pytest.approx(0.92)
+    assert result.id == "vlmv_test123"
+    assert result.input_tokens == 400
+    assert result.total_cost_usd == pytest.approx(0.0015)
+
 
-    @patch("groundlight.client.requests")
-    def test_single_numpy_image_encoded_as_jpeg(self, mock_requests, gl):
+def test_single_numpy_image_encoded_as_jpeg(gl: Groundlight):
+    """A numpy array is encoded to JPEG and sent as a single multipart 'media' part."""
+    with mock.patch("groundlight.client.requests") as mock_requests:
         mock_requests.post.return_value = _mock_response()
-        frame = np.zeros((480, 640, 3), dtype=np.uint8)
+        gl.ask_vlm(media=np.zeros((480, 640, 3), dtype=np.uint8), query="Is there a fire?")
 
-        gl.ask_vlm(media=frame, query="Is there a fire?")
+    _, kwargs = mock_requests.post.call_args
+    files = kwargs["files"]
+    assert len(files) == 1
+    assert files[0][0] == "media"
+    _name, data, ctype = files[0][1]
+    assert ctype == "image/jpeg"
+    assert len(data) > 0
 
-        _, kwargs = mock_requests.post.call_args
-        files = kwargs["files"]
-        assert len(files) == 1
-        assert files[0][0] == "media"
-        name, data, ctype = files[0][1]
-        assert ctype == "image/jpeg"
-        assert len(data) > 0  # bytes were produced
 
-    @patch("groundlight.client.requests")
-    def test_dual_images_sends_two_parts(self, mock_requests, gl):
+def test_dual_images_sends_two_parts(gl: Groundlight):
+    """Passing a list of two images sends two 'media' multipart parts."""
+    with mock.patch("groundlight.client.requests") as mock_requests:
         mock_requests.post.return_value = _mock_response()
-        frame = np.zeros((480, 640, 3), dtype=np.uint8)
-        roi = np.zeros((120, 120, 3), dtype=np.uint8)
+        gl.ask_vlm(
+            media=[np.zeros((480, 640, 3), dtype=np.uint8), np.zeros((120, 120, 3), dtype=np.uint8)],
+            query="Is there a fire?",
+        )
 
-        gl.ask_vlm(media=[frame, roi], query="Is there a fire?")
+    _, kwargs = mock_requests.post.call_args
+    assert len(kwargs["files"]) == 2
 
-        _, kwargs = mock_requests.post.call_args
-        assert len(kwargs["files"]) == 2
 
-    @patch("groundlight.client.requests")
-    def test_url_has_correct_path(self, mock_requests, gl):
+def test_url_has_correct_path(gl: Groundlight):
+    """sanitize_endpoint_url strips the trailing slash, so we must insert '/' before
+    the path — without it the URL would be '...device-apiv1/vlm-verifications'."""
+    with mock.patch("groundlight.client.requests") as mock_requests:
         mock_requests.post.return_value = _mock_response()
-
         gl.ask_vlm(media=np.zeros((100, 100, 3), dtype=np.uint8), query="test")
 
-        args, _ = mock_requests.post.call_args
-        url = args[0]
-        # sanitize_endpoint_url strips the trailing slash, so we must insert "/" before
-        # the path — without it the URL would be "...device-apiv1/vlm-verifications".
-        assert url.endswith("/v1/vlm-verifications"), f"Bad URL: {url}"
-        assert "/device-api/v1/vlm-verifications" in url
+    args, _ = mock_requests.post.call_args
+    url = args[0]
+    assert "/device-api/v1/vlm-verifications" in url
 
-    @patch("groundlight.client.requests")
-    def test_query_and_model_id_sent_as_form_fields(self, mock_requests, gl):
-        mock_requests.post.return_value = _mock_response(model_id="nova-pro")
 
+def test_query_and_model_id_sent_as_form_fields(gl: Groundlight):
+    """query and model_id go in the multipart body, never in the URL query string."""
+    with mock.patch("groundlight.client.requests") as mock_requests:
+        mock_requests.post.return_value = _mock_response(model_id="nova-pro")
         gl.ask_vlm(media=np.zeros((100, 100, 3), dtype=np.uint8), query="Is there a fire?", model_id="nova-pro")
 
-        _, kwargs = mock_requests.post.call_args
-        # Text fields go in the multipart body, never the URL query string.
-        assert kwargs["data"]["query"] == "Is there a fire?"
-        assert kwargs["data"]["model_id"] == "nova-pro"
-        assert "params" not in kwargs or not kwargs["params"]
+    _, kwargs = mock_requests.post.call_args
+    assert kwargs["data"]["query"] == "Is there a fire?"
+    assert kwargs["data"]["model_id"] == "nova-pro"
+    assert "params" not in kwargs or not kwargs.get("params")
 
-    @patch("groundlight.client.requests")
-    def test_no_model_id_omits_field(self, mock_requests, gl):
-        mock_requests.post.return_value = _mock_response()
 
+def test_no_model_id_omits_field(gl: Groundlight):
+    """Omitting model_id leaves the field out entirely so the server uses its default."""
+    with mock.patch("groundlight.client.requests") as mock_requests:
+        mock_requests.post.return_value = _mock_response()
         gl.ask_vlm(media=np.zeros((100, 100, 3), dtype=np.uint8), query="test")
 
-        _, kwargs = mock_requests.post.call_args
-        assert "model_id" not in kwargs["data"]
-        assert kwargs["data"]["query"] == "test"
+    _, kwargs = mock_requests.post.call_args
+    assert "model_id" not in kwargs["data"]
 
-    def test_more_than_eight_media_raises(self, gl):
-        frame = np.zeros((100, 100, 3), dtype=np.uint8)
-        with pytest.raises(ValueError, match="at most 8"):
-            gl.ask_vlm(media=[frame] * 9, query="test")
 
-    @patch("groundlight.client.requests")
-    def test_bytes_image_accepted(self, mock_requests, gl):
+def test_more_than_eight_media_raises(gl: Groundlight):
+    """Supplying more than 8 media items raises ValueError before any network call."""
+    with pytest.raises(ValueError, match="at most 8"):
+        gl.ask_vlm(media=[np.zeros((100, 100, 3), dtype=np.uint8)] * 9, query="test")
+
+
+def test_timeout_passed_to_requests(gl: Groundlight):
+    """The timeout parameter is forwarded to requests.post."""
+    with mock.patch("groundlight.client.requests") as mock_requests:
         mock_requests.post.return_value = _mock_response()
-        # A minimal valid JPEG header
-        jpeg_bytes = b"\xff\xd8\xff\xe0" + b"\x00" * 100
-
-        # Should not raise
-        try:
-            gl.ask_vlm(media=jpeg_bytes, query="test")
-        except Exception:
-            pass  # parse_supported_image_types may reject invalid JPEG body; that's fine here
+        gl.ask_vlm(media=np.zeros((100, 100, 3), dtype=np.uint8), query="test", timeout=5.0)
+
+    _, kwargs = mock_requests.post.call_args
+    assert kwargs["timeout"] == pytest.approx(5.0)
+
+
+def test_corrupted_image_bytes_raises_http_error(gl: Groundlight):
+    """Corrupted bytes are not validated client-side — the server rejects them with a
+    400, which raise_for_status() converts to requests.HTTPError."""
+    error_resp = MagicMock()
+    error_resp.status_code = 400
+    error_resp.raise_for_status.side_effect = Exception("400 Bad Request")
+
+    with mock.patch("groundlight.client.requests") as mock_requests:
+        mock_requests.post.return_value = error_resp
+        with pytest.raises(Exception, match="400"):
+            gl.ask_vlm(media=b"this-is-not-a-valid-image", query="test")

From 7216313d8b6cd0b5a9e57e6e9a1c032496ce263b Mon Sep 17 00:00:00 2001
From: buildci <noreply@taservs.net>
Date: Wed, 24 Jun 2026 16:09:48 -0700
Subject: [PATCH 9/9] trim ask_vlm tests to meaningful coverage only

Drop tests that only verify kwarg passthroughs or mock server-side
behavior (timeout forwarding, corrupted-image 400, dual-image loop,
model_id omission). Keep the five that catch real issues or verify
non-obvious invariants: result parsing, image encoding, form-field
vs URL security property, >8 guard, and the URL path bug.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 test/unit/test_ask_vlm.py | 70 +++++++--------------------------------
 1 file changed, 12 insertions(+), 58 deletions(-)

diff --git a/test/unit/test_ask_vlm.py b/test/unit/test_ask_vlm.py
index 81ab281e..d6e829d7 100644
--- a/test/unit/test_ask_vlm.py
+++ b/test/unit/test_ask_vlm.py
@@ -32,7 +32,7 @@ def _mock_response(verdict="YES", confidence=0.92, reasoning="Flames visible.",
 
 
 def test_returns_vlm_verification_result(gl: Groundlight):
-    """ask_vlm returns a typed VLMVerificationResult with all expected fields populated."""
+    """Result fields are correctly unpacked from the server response JSON."""
     with mock.patch("groundlight.client.requests") as mock_requests:
         mock_requests.post.return_value = _mock_response()
         result = gl.ask_vlm(media=np.zeros((100, 100, 3), dtype=np.uint8), query="Is there a fire?")
@@ -45,8 +45,8 @@ def test_returns_vlm_verification_result(gl: Groundlight):
     assert result.total_cost_usd == pytest.approx(0.0015)
 
 
-def test_single_numpy_image_encoded_as_jpeg(gl: Groundlight):
-    """A numpy array is encoded to JPEG and sent as a single multipart 'media' part."""
+def test_numpy_image_encoded_as_jpeg_multipart(gl: Groundlight):
+    """A numpy array is converted to JPEG and sent as a multipart 'media' part."""
     with mock.patch("groundlight.client.requests") as mock_requests:
         mock_requests.post.return_value = _mock_response()
         gl.ask_vlm(media=np.zeros((480, 640, 3), dtype=np.uint8), query="Is there a fire?")
@@ -60,33 +60,9 @@ def test_single_numpy_image_encoded_as_jpeg(gl: Groundlight):
     assert len(data) > 0
 
 
-def test_dual_images_sends_two_parts(gl: Groundlight):
-    """Passing a list of two images sends two 'media' multipart parts."""
-    with mock.patch("groundlight.client.requests") as mock_requests:
-        mock_requests.post.return_value = _mock_response()
-        gl.ask_vlm(
-            media=[np.zeros((480, 640, 3), dtype=np.uint8), np.zeros((120, 120, 3), dtype=np.uint8)],
-            query="Is there a fire?",
-        )
-
-    _, kwargs = mock_requests.post.call_args
-    assert len(kwargs["files"]) == 2
-
-
-def test_url_has_correct_path(gl: Groundlight):
-    """sanitize_endpoint_url strips the trailing slash, so we must insert '/' before
-    the path — without it the URL would be '...device-apiv1/vlm-verifications'."""
-    with mock.patch("groundlight.client.requests") as mock_requests:
-        mock_requests.post.return_value = _mock_response()
-        gl.ask_vlm(media=np.zeros((100, 100, 3), dtype=np.uint8), query="test")
-
-    args, _ = mock_requests.post.call_args
-    url = args[0]
-    assert "/device-api/v1/vlm-verifications" in url
-
-
-def test_query_and_model_id_sent_as_form_fields(gl: Groundlight):
-    """query and model_id go in the multipart body, never in the URL query string."""
+def test_query_sent_as_form_field_not_url_param(gl: Groundlight):
+    """query and model_id go in the multipart body — never the URL — so the prompt
+    doesn't leak into access logs."""
     with mock.patch("groundlight.client.requests") as mock_requests:
         mock_requests.post.return_value = _mock_response(model_id="nova-pro")
         gl.ask_vlm(media=np.zeros((100, 100, 3), dtype=np.uint8), query="Is there a fire?", model_id="nova-pro")
@@ -97,40 +73,18 @@ def test_query_and_model_id_sent_as_form_fields(gl: Groundlight):
     assert "params" not in kwargs or not kwargs.get("params")
 
 
-def test_no_model_id_omits_field(gl: Groundlight):
-    """Omitting model_id leaves the field out entirely so the server uses its default."""
-    with mock.patch("groundlight.client.requests") as mock_requests:
-        mock_requests.post.return_value = _mock_response()
-        gl.ask_vlm(media=np.zeros((100, 100, 3), dtype=np.uint8), query="test")
-
-    _, kwargs = mock_requests.post.call_args
-    assert "model_id" not in kwargs["data"]
-
-
 def test_more_than_eight_media_raises(gl: Groundlight):
     """Supplying more than 8 media items raises ValueError before any network call."""
     with pytest.raises(ValueError, match="at most 8"):
         gl.ask_vlm(media=[np.zeros((100, 100, 3), dtype=np.uint8)] * 9, query="test")
 
 
-def test_timeout_passed_to_requests(gl: Groundlight):
-    """The timeout parameter is forwarded to requests.post."""
+def test_url_has_correct_path(gl: Groundlight):
+    """sanitize_endpoint_url strips the trailing slash from self.endpoint, so the path
+    must include a leading '/' — without it the URL becomes '...device-apiv1/...'."""
     with mock.patch("groundlight.client.requests") as mock_requests:
         mock_requests.post.return_value = _mock_response()
-        gl.ask_vlm(media=np.zeros((100, 100, 3), dtype=np.uint8), query="test", timeout=5.0)
-
-    _, kwargs = mock_requests.post.call_args
-    assert kwargs["timeout"] == pytest.approx(5.0)
-
-
-def test_corrupted_image_bytes_raises_http_error(gl: Groundlight):
-    """Corrupted bytes are not validated client-side — the server rejects them with a
-    400, which raise_for_status() converts to requests.HTTPError."""
-    error_resp = MagicMock()
-    error_resp.status_code = 400
-    error_resp.raise_for_status.side_effect = Exception("400 Bad Request")
+        gl.ask_vlm(media=np.zeros((100, 100, 3), dtype=np.uint8), query="test")
 
-    with mock.patch("groundlight.client.requests") as mock_requests:
-        mock_requests.post.return_value = error_resp
-        with pytest.raises(Exception, match="400"):
-            gl.ask_vlm(media=b"this-is-not-a-valid-image", query="test")
+    args, _ = mock_requests.post.call_args
+    assert "/device-api/v1/vlm-verifications" in args[0]