diff --git a/README.md b/README.md
index e5a2b52..df4e7a7 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,7 @@ Command-line and Python client for downloading and deploying datasets on DBpedia
- [Download](#cli-download)
- [Deploy](#cli-deploy)
- [Delete](#cli-delete)
+ - [Manifest](#cli-manifest)
- [Module Usage](#module-usage)
- [Deploy](#module-deploy)
- [Development & Contributing](#development--contributing)
@@ -556,6 +557,55 @@ databusclient delete https://databus.dbpedia.org/dbpedia/collections/dbpedia-sna
docker run --rm -v $(pwd):/data dbpedia/databus-python-client delete https://databus.dbpedia.org/dbpedia/collections/dbpedia-snapshot-2022-12 --databus-key YOUR_API_KEY
```
+
+### Manifest
+
+All three commands support an optional `--manifest` flag that writes a structured JSON-LD record of the operation to disk:
+
+**Download**
+```bash
+# Python
+databusclient download https://databus.dbpedia.org/dbpedia/mappings/mappingbased-literals/2022.12.01/mappingbased-literals_lang=az.ttl.bz2 --manifest ./manifests/download-run.jsonld
+# Docker
+docker run --rm -v $(pwd):/data dbpedia/databus-python-client download https://databus.dbpedia.org/dbpedia/mappings/mappingbased-literals/2022.12.01/mappingbased-literals_lang=az.ttl.bz2 --manifest ./manifests/download-run.jsonld
+```
+
+**Deploy**
+```bash
+# Python
+databusclient deploy \
+ --version-id https://databus.dbpedia.org/user1/group1/artifact1/2022-05-18 \
+ --title "Client Testing" --abstract "Testing the client...." \
+ --description "Testing the client...." \
+ --license http://dalicc.net/licenselibrary/AdaptivePublicLicense10 \
+ --apikey YOUR_KEY --manifest ./manifests/deploy-run.jsonld \
+ 'https://raw.githubusercontent.com/dbpedia/databus/master/server/app/api/swagger.yml|type=swagger'
+# Docker
+docker run --rm -v $(pwd):/data dbpedia/databus-python-client deploy \
+ --version-id https://databus.dbpedia.org/user1/group1/artifact1/2022-05-18 \
+ --title "Client Testing" --abstract "Testing the client...." \
+ --description "Testing the client...." \
+ --license http://dalicc.net/licenselibrary/AdaptivePublicLicense10 \
+ --apikey YOUR_KEY --manifest ./manifests/deploy-run.jsonld \
+ 'https://raw.githubusercontent.com/dbpedia/databus/master/server/app/api/swagger.yml|type=swagger'
+```
+**Delete**
+```bash
+# Python
+databusclient delete https://databus.dbpedia.org/dbpedia/mappings/mappingbased-literals/2022.12.01 --databus-key YOUR_API_KEY --manifest ./manifests/delete-run.jsonld
+# Docker
+docker run --rm -v $(pwd):/data dbpedia/databus-python-client delete https://databus.dbpedia.org/dbpedia/mappings/mappingbased-literals/2022.12.01 --databus-key YOUR_API_KEY --manifest ./manifests/delete-run.jsonld
+```
+
+The manifest records input parameters, per-file URLs, checksums, byte sizes, timestamps, and success/failure status for each file. It uses the DataID vocabulary and is versioned via `dbus:schemaVersion`.
+
+- If the target path already exists, the manifest is written to an auto-suffixed path (e.g. `run_1.jsonld`) with a warning.
+- Sensitive fields (API keys, vault tokens) are never written.
+- If manifest writing fails, a warning is printed and the exit code reflects the actual operation result.
+- If the operation itself fails, a `dbus:operationError` block is recorded in the manifest capturing the error type, message, and traceback.
+
+See `examples/reproducible-download.md` for a full walkthrough.
+
## Module Usage
@@ -675,4 +725,4 @@ Or to ensure compatibility with the `pyproject.toml` configured dependencies, ru
```bash
poetry run pytest tests/
-```
\ No newline at end of file
+```
diff --git a/databusclient/api/delete.py b/databusclient/api/delete.py
index 8e2d916..199e5a4 100644
--- a/databusclient/api/delete.py
+++ b/databusclient/api/delete.py
@@ -23,13 +23,16 @@ class DeleteQueue:
Allows adding multiple databus URIs to a queue and executing their deletion in batch.
"""
- def __init__(self, databus_key: str):
+ def __init__(self, databus_key: str, manifest_context=None):
"""Create a DeleteQueue bound to a given Databus API key.
Args:
databus_key: API key used to authenticate deletion requests.
+ manifest_context: Optional ManifestContext to record deletion
+ outcomes into. Passed through to _delete_list on execute().
"""
self.databus_key = databus_key
+ self.manifest_context = manifest_context
self.queue: set[str] = set()
def add_uri(self, databusURI: str):
@@ -69,11 +72,13 @@ def execute(self):
"""Execute all queued deletions.
Each queued URI will be deleted using `_delete_resource`.
+ Passes manifest_context through so deletions are recorded.
"""
_delete_list(
list(self.sorted_queue()),
self.databus_key,
force=True,
+ manifest_context=self.manifest_context,
)
@@ -116,6 +121,7 @@ def _delete_resource(
dry_run: bool = False,
force: bool = False,
queue: DeleteQueue = None,
+ manifest_context=None,
):
"""Delete a single Databus resource (version, artifact, group).
@@ -144,6 +150,8 @@ def _delete_resource(
if dry_run:
print(f"[DRY RUN] Would delete: {databusURI}")
+ if manifest_context is not None:
+ manifest_context.record_file(url=databusURI, status="dry_run")
return
if queue is not None:
@@ -156,6 +164,8 @@ def _delete_resource(
if response.status_code in (200, 204):
print(f"Successfully deleted: {databusURI}")
+ if manifest_context is not None:
+ manifest_context.record_file(url=databusURI, status="success")
else:
raise Exception(
f"Failed to delete {databusURI}: {response.status_code} - {response.text}"
@@ -168,6 +178,7 @@ def _delete_list(
dry_run: bool = False,
force: bool = False,
queue: DeleteQueue = None,
+ manifest_context=None,
):
"""Delete a list of Databus resources.
@@ -180,7 +191,7 @@ def _delete_list(
"""
for databusURI in databusURIs:
_delete_resource(
- databusURI, databus_key, dry_run=dry_run, force=force, queue=queue
+ databusURI, databus_key, dry_run=dry_run, force=force, queue=queue, manifest_context=manifest_context
)
@@ -190,6 +201,7 @@ def _delete_artifact(
dry_run: bool = False,
force: bool = False,
queue: DeleteQueue = None,
+ manifest_context=None,
):
"""Delete an artifact and all its versions.
@@ -223,11 +235,11 @@ def _delete_artifact(
else:
# Delete all versions
_delete_list(
- version_uris, databus_key, dry_run=dry_run, force=force, queue=queue
+ version_uris, databus_key, dry_run=dry_run, force=force, queue=queue, manifest_context=manifest_context
)
# Finally, delete the artifact itself
- _delete_resource(databusURI, databus_key, dry_run=dry_run, force=force, queue=queue)
+ _delete_resource(databusURI, databus_key, dry_run=dry_run, force=force, queue=queue,manifest_context=manifest_context)
def _delete_group(
@@ -236,6 +248,7 @@ def _delete_group(
dry_run: bool = False,
force: bool = False,
queue: DeleteQueue = None,
+ manifest_context=None,
):
"""Delete a group and all its artifacts and versions.
@@ -266,14 +279,14 @@ def _delete_group(
# Delete all artifacts (which deletes their versions)
for artifact_uri in artifact_uris:
_delete_artifact(
- artifact_uri, databus_key, dry_run=dry_run, force=force, queue=queue
+ artifact_uri, databus_key, dry_run=dry_run, force=force, queue=queue, manifest_context=manifest_context
)
# Finally, delete the group itself
- _delete_resource(databusURI, databus_key, dry_run=dry_run, force=force, queue=queue)
+ _delete_resource(databusURI, databus_key, dry_run=dry_run, force=force, queue=queue,manifest_context=manifest_context)
-def delete(databusURIs: List[str], databus_key: str, dry_run: bool, force: bool):
+def delete(databusURIs: List[str], databus_key: str, dry_run: bool, force: bool, manifest_context=None):
"""Delete a dataset from the databus.
Delete a group, artifact, or version identified by the given databus URI.
@@ -286,7 +299,7 @@ def delete(databusURIs: List[str], databus_key: str, dry_run: bool, force: bool)
force: If True, skip confirmation prompt and proceed with deletion.
"""
- queue = DeleteQueue(databus_key)
+ queue = DeleteQueue(databus_key, manifest_context=manifest_context)
for databusURI in databusURIs:
_host, _account, group, artifact, version, file = (
@@ -296,24 +309,24 @@ def delete(databusURIs: List[str], databus_key: str, dry_run: bool, force: bool)
if group == "collections" and artifact is not None:
print(f"Deleting collection: {databusURI}")
_delete_resource(
- databusURI, databus_key, dry_run=dry_run, force=force, queue=queue
+ databusURI, databus_key, dry_run=dry_run, force=force, queue=queue, manifest_context=manifest_context
)
elif file is not None:
print(f"Deleting file is not supported via API: {databusURI}")
elif version is not None:
print(f"Deleting version: {databusURI}")
_delete_resource(
- databusURI, databus_key, dry_run=dry_run, force=force, queue=queue
+ databusURI, databus_key, dry_run=dry_run, force=force, queue=queue, manifest_context=manifest_context
)
elif artifact is not None:
print(f"Deleting artifact and all its versions: {databusURI}")
_delete_artifact(
- databusURI, databus_key, dry_run=dry_run, force=force, queue=queue
+ databusURI, databus_key, dry_run=dry_run, force=force, queue=queue, manifest_context=manifest_context
)
elif group is not None and group != "collections":
print(f"Deleting group and all its artifacts and versions: {databusURI}")
_delete_group(
- databusURI, databus_key, dry_run=dry_run, force=force, queue=queue
+ databusURI, databus_key, dry_run=dry_run, force=force, queue=queue, manifest_context=manifest_context
)
else:
print(f"Deleting {databusURI} is not supported.")
diff --git a/databusclient/api/download.py b/databusclient/api/download.py
index bd71555..82fe672 100644
--- a/databusclient/api/download.py
+++ b/databusclient/api/download.py
@@ -12,6 +12,7 @@
import requests
from SPARQLWrapper import JSON, SPARQLWrapper
from tqdm import tqdm
+from datetime import datetime, timezone
from databusclient.api.utils import (
fetch_databus_jsonld,
@@ -327,6 +328,7 @@ def _download_file(
base_uri=None,
validate_checksum: bool = False,
expected_checksum: str | None = None,
+ manifest_context=None,
) -> None:
"""Download a file from the internet with a progress bar using tqdm.
@@ -495,32 +497,43 @@ def _download_file(
raise IOError("Downloaded size does not match Content-Length header")
# --- 6. Validate checksum on original downloaded file (BEFORE conversion) ---
+ actual_checksum = None
if validate_checksum:
- # reuse compute_sha256_and_length from webdav extension
try:
- actual, _ = compute_sha256_and_length(filename)
+ actual_checksum, _ = compute_sha256_and_length(filename)
except (OSError, IOError) as e:
print(f"WARNING: error computing checksum for {filename}: {e}")
- actual = None
+ actual_checksum = None
if expected_checksum is None:
print(
f"WARNING: no expected checksum available for {filename}; skipping validation"
)
- elif actual is None:
+ elif actual_checksum is None:
print(
f"WARNING: could not compute checksum for {filename}; skipping validation"
)
else:
- if actual.lower() != expected_checksum.lower():
+ if actual_checksum.lower() != expected_checksum.lower():
try:
- os.remove(filename) # delete corrupted file
+ os.remove(filename)
except OSError:
pass
raise IOError(
- f"Checksum mismatch for {filename}: expected {expected_checksum}, got {actual}"
+ f"Checksum mismatch for {filename}: expected {expected_checksum}, got {actual_checksum}"
)
+ # # Record file to manifest after all verification passes.
+ # # Use actual computed checksum if available, otherwise fall back to expected.
+ # if manifest_context is not None:
+ # manifest_context.record_file(
+ # url=url,
+ # status="success",
+ # sha256=actual_checksum or expected_checksum,
+ # size_bytes=total_size_in_bytes if total_size_in_bytes else None,
+ # downloaded_at=datetime.now(timezone.utc).isoformat(),
+ # )
+
# --- 7. Unified compression/format conversion pass ---
source_compression = _detect_compression_format(file)
should_convert_compression, source_fmt = _should_convert_compression(
@@ -529,6 +542,14 @@ def _download_file(
needs_format_conversion = convert_format is not None
if not should_convert_compression and not needs_format_conversion:
+ if manifest_context is not None:
+ manifest_context.record_file(
+ url=url,
+ status="success",
+ sha256=actual_checksum or expected_checksum,
+ size_bytes=total_size_in_bytes if total_size_in_bytes else None,
+ downloaded_at=datetime.now(timezone.utc).isoformat(),
+ )
return
temp_paths: list[str] = []
@@ -701,6 +722,16 @@ def _download_file(
if os.path.exists(temp_path):
os.remove(temp_path)
+ # Record file to manifest only after all conversion completes successfully.
+ # This ensures the manifest reflects the actual final output, not just the download.
+ if manifest_context is not None:
+ manifest_context.record_file(
+ url=url,
+ status="success",
+ sha256=actual_checksum or expected_checksum,
+ size_bytes=total_size_in_bytes if total_size_in_bytes else None,
+ downloaded_at=datetime.now(timezone.utc).isoformat(),
+ )
def _download_files(
urls: List[str],
@@ -713,6 +744,7 @@ def _download_files(
convert_format: str = None,
graph_name: str = None,
base_uri: str = None,
+ manifest_context=None,
validate_checksum: bool = False,
checksums: dict | None = None,
) -> None:
@@ -749,6 +781,7 @@ def _download_files(
base_uri=base_uri,
validate_checksum=validate_checksum,
expected_checksum=expected,
+ manifest_context=manifest_context,
)
def _get_sparql_query_of_collection(uri: str, databus_key: str | None = None) -> str:
@@ -896,6 +929,7 @@ def _download_collection(
convert_format: str = None,
graph_name: str = None,
base_uri: str = None,
+ manifest_context=None,
validate_checksum: bool = False,
) -> None:
"""Download all files in a databus collection.
@@ -935,6 +969,7 @@ def _download_collection(
convert_format=convert_format,
graph_name=graph_name,
base_uri=base_uri,
+ manifest_context=manifest_context,
validate_checksum=validate_checksum,
checksums=checksums if checksums else None,
)
@@ -951,6 +986,7 @@ def _download_version(
convert_format: str = None,
graph_name: str = None,
base_uri: str = None,
+ manifest_context=None,
validate_checksum: bool = False,
) -> None:
"""Download all files in a databus artifact version.
@@ -988,6 +1024,7 @@ def _download_version(
convert_format=convert_format,
graph_name=graph_name,
base_uri=base_uri,
+ manifest_context=manifest_context,
validate_checksum=validate_checksum,
checksums=checksums,
)
@@ -1005,6 +1042,7 @@ def _download_artifact(
convert_format: str = None,
graph_name: str = None,
base_uri: str = None,
+ manifest_context=None,
validate_checksum: bool = False,
) -> None:
"""Download files in a databus artifact.
@@ -1049,6 +1087,7 @@ def _download_artifact(
convert_format=convert_format,
graph_name=graph_name,
base_uri=base_uri,
+ manifest_context=manifest_context,
validate_checksum=validate_checksum,
checksums=checksums,
)
@@ -1127,6 +1166,7 @@ def _download_group(
convert_format: str = None,
graph_name: str = None,
base_uri: str = None,
+ manifest_context=None,
validate_checksum: bool = False,
) -> None:
"""Download files in a databus group.
@@ -1161,6 +1201,7 @@ def _download_group(
convert_format=convert_format,
graph_name=graph_name,
base_uri=base_uri,
+ manifest_context=manifest_context,
validate_checksum=validate_checksum,
)
@@ -1213,6 +1254,7 @@ def download(
graph_name=None,
base_uri=None,
validate_checksum: bool = False,
+ manifest_context=None,
) -> None:
"""Download datasets from databus.
@@ -1262,6 +1304,7 @@ def download(
convert_format,
graph_name=graph_name,
base_uri=base_uri,
+ manifest_context=manifest_context,
validate_checksum=validate_checksum,
)
elif file is not None:
@@ -1285,6 +1328,7 @@ def download(
convert_format=convert_format,
graph_name=graph_name,
base_uri=base_uri,
+ manifest_context=manifest_context,
validate_checksum=validate_checksum,
expected_checksum=expected,
)
@@ -1301,6 +1345,7 @@ def download(
convert_format=convert_format,
graph_name=graph_name,
base_uri=base_uri,
+ manifest_context=manifest_context,
validate_checksum=validate_checksum,
)
elif artifact is not None:
@@ -1319,6 +1364,7 @@ def download(
convert_format=convert_format,
graph_name=graph_name,
base_uri=base_uri,
+ manifest_context=manifest_context,
validate_checksum=validate_checksum,
)
elif group is not None and group != "collections":
@@ -1337,6 +1383,7 @@ def download(
convert_format=convert_format,
graph_name=graph_name,
base_uri=base_uri,
+ manifest_context=manifest_context,
validate_checksum=validate_checksum,
)
elif account is not None:
@@ -1377,6 +1424,7 @@ def download(
convert_format=convert_format,
graph_name=graph_name,
base_uri=base_uri,
+ manifest_context=manifest_context,
validate_checksum=validate_checksum,
checksums=checksums if checksums else None,
)
\ No newline at end of file
diff --git a/databusclient/cli.py b/databusclient/cli.py
index 277d0d6..ed35aa5 100644
--- a/databusclient/cli.py
+++ b/databusclient/cli.py
@@ -8,6 +8,8 @@
import databusclient.api.deploy as api_deploy
from databusclient.api.delete import delete as api_delete
from databusclient.api.download import download as api_download, DownloadAuthError
+from databusclient.manifest.context import ManifestContext
+from databusclient.manifest.writer import ManifestWriter
from databusclient.extensions import webdav
@@ -59,6 +61,12 @@ def app():
"webdav_url",
help="WebDAV URL (e.g., https://cloud.example.com/remote.php/webdav)",
)
+@click.option(
+ "--manifest",
+ "manifest_path",
+ default=None,
+ help="Write a JSON-LD manifest of this operation to PATH.",
+)
@click.option("--remote", help="rclone remote name (e.g., 'nextcloud')")
@click.option("--path", help="Remote path on Nextcloud (e.g., 'datasets/mydataset')")
@click.argument("distributions", nargs=-1)
@@ -74,6 +82,7 @@ def deploy(
remote,
path,
distributions: List[str],
+ manifest_path,
):
"""
Flexible deploy to Databus command supporting three modes:\n
@@ -91,31 +100,80 @@ def deploy(
raise click.UsageError(
"Invalid combination: when using WebDAV/Nextcloud mode, please provide --webdav-url, --remote, and --path together."
)
+
+ manifest_context = None
+ if manifest_path:
+ manifest_context = ManifestContext(command="deploy")
+ manifest_context.record_params({
+ "version_id": version_id,
+ "title": title,
+ "abstract": abstract,
+ "description": description,
+ "license_url": license_url,
+ "distributions": list(distributions) if distributions else [],
+ "metadata_file": metadata_file,
+ })
+
+ def _write_manifest():
+ if manifest_path and manifest_context is not None:
+ try:
+ actual_path = ManifestWriter.write(manifest_context, manifest_path)
+ click.echo(f"Manifest written to {actual_path}")
+ except (OSError, IOError) as e:
+ click.echo(
+ f"WARNING: Manifest could not be written to {manifest_path}: {e}",
+ err=True,
+ )
# === Mode 1: Classic Deploy ===
if distributions and not (metadata_file or webdav_url or remote or path):
click.echo("[MODE] Classic deploy with distributions")
click.echo(f"Deploying dataset version: {version_id}")
-
- dataid = api_deploy.create_dataset(
- version_id=version_id,
- artifact_version_title=title,
- artifact_version_abstract=abstract,
- artifact_version_description=description,
- license_url=license_url,
- distributions=distributions,
- )
- api_deploy.deploy(dataid=dataid, api_key=apikey)
+ try:
+ dataid = api_deploy.create_dataset(
+ version_id=version_id,
+ artifact_version_title=title,
+ artifact_version_abstract=abstract,
+ artifact_version_description=description,
+ license_url=license_url,
+ distributions=distributions,
+ )
+ api_deploy.deploy(dataid=dataid, api_key=apikey)
+ if manifest_context:
+ for dist in distributions:
+ url = str(dist).split("|")[0]
+ manifest_context.record_file(url=url, status="success")
+ except Exception as exc:
+ if manifest_context:
+ manifest_context.record_operation_error(exc)
+ raise
+ finally:
+ _write_manifest()
return
# === Mode 2: Metadata File ===
if metadata_file:
click.echo(f"[MODE] Deploy from metadata file: {metadata_file}")
- with open(metadata_file, "r") as f:
- metadata = json.load(f)
- api_deploy.deploy_from_metadata(
- metadata, version_id, title, abstract, description, license_url, apikey
- )
+ try:
+ with open(metadata_file, "r") as f:
+ metadata = json.load(f)
+ api_deploy.deploy_from_metadata(
+ metadata, version_id, title, abstract, description, license_url, apikey
+ )
+ if manifest_context:
+ for entry in metadata:
+ manifest_context.record_file(
+ url=entry.get("url", ""),
+ status="success",
+ sha256=entry.get("checksum"),
+ size_bytes=entry.get("size"),
+ )
+ except Exception as exc:
+ if manifest_context:
+ manifest_context.record_operation_error(exc)
+ raise
+ finally:
+ _write_manifest()
return
# === Mode 3: Upload & Deploy (Nextcloud) ===
@@ -124,20 +182,32 @@ def deploy(
raise click.UsageError(
"Please provide files to upload when using WebDAV/Nextcloud mode."
)
-
- # Check that all given paths exist and are files or directories.
invalid = [f for f in distributions if not os.path.exists(f)]
if invalid:
raise click.UsageError(
f"The following input files or folders do not exist: {', '.join(invalid)}"
)
-
click.echo("[MODE] Upload & Deploy to DBpedia Databus via Nextcloud")
click.echo(f"→ Uploading to: {remote}:{path}")
- metadata = webdav.upload_to_webdav(distributions, remote, path, webdav_url)
- api_deploy.deploy_from_metadata(
- metadata, version_id, title, abstract, description, license_url, apikey
- )
+ try:
+ metadata = webdav.upload_to_webdav(distributions, remote, path, webdav_url)
+ api_deploy.deploy_from_metadata(
+ metadata, version_id, title, abstract, description, license_url, apikey
+ )
+ if manifest_context:
+ for entry in metadata:
+ manifest_context.record_file(
+ url=entry.get("url", ""),
+ status="success",
+ sha256=entry.get("checksum"),
+ size_bytes=entry.get("size"),
+ )
+ except Exception as exc:
+ if manifest_context:
+ manifest_context.record_operation_error(exc)
+ raise
+ finally:
+ _write_manifest()
return
raise click.UsageError(
@@ -147,7 +217,6 @@ def deploy(
" - Upload & deploy: use --webdav-url, --remote, --path, and file arguments"
)
-
@app.command()
@click.argument("databusuris", nargs=-1, required=True)
@click.option(
@@ -222,6 +291,12 @@ def deploy(
help="Base URI for CSV -> RDF Triple conversion (Layer 3). "
"Required when converting CSV/TSV to RDF triple formats.",
)
+@click.option(
+ "--manifest",
+ "manifest_path",
+ default=None,
+ help="Write a JSON-LD manifest of this operation to PATH (e.g. --manifest manifest.jsonld).",
+)
@click.option(
"--validate-checksum", is_flag=True, help="Validate checksums of downloaded files"
)
@@ -239,11 +314,38 @@ def download(
graph_name,
base_uri,
validate_checksum,
+ manifest_path,
):
"""
Download datasets from databus, optionally using vault access if vault options are provided.
Supports on-the-fly compression format conversion using --convert-to and --convert-from options.
"""
+ # Determine auth method for manifest (never store the token itself)
+ auth_method = None
+ if vault_token:
+ auth_method = "vault_token"
+ elif databus_key:
+ auth_method = "databus_key"
+
+ manifest_context = None
+ if manifest_path:
+ manifest_context = ManifestContext(
+ command="download",
+ endpoint=databus,
+ auth_method=auth_method,
+ )
+ # Record safe replay params — sensitive fields excluded
+ manifest_context.record_params({
+ "databusURIs": list(databusuris),
+ "compression": compression,
+ "convert_format": convert_format,
+ "graph_name": graph_name,
+ "base_uri": base_uri,
+ "all_versions": all_versions,
+ "validate_checksum": validate_checksum,
+ "authurl": authurl,
+ "clientid": clientid,
+ })
try:
api_download(
localDir=localdir,
@@ -259,11 +361,26 @@ def download(
graph_name=graph_name,
base_uri=base_uri,
validate_checksum=validate_checksum,
+ manifest_context=manifest_context,
)
except DownloadAuthError as e:
+ if manifest_context:
+ manifest_context.record_operation_error(e)
raise click.ClickException(str(e))
except ValueError as e:
+ if manifest_context:
+ manifest_context.record_operation_error(e)
raise click.ClickException(str(e))
+ finally:
+ if manifest_path and manifest_context is not None:
+ try:
+ actual_path = ManifestWriter.write(manifest_context, manifest_path)
+ click.echo(f"Manifest written to {actual_path}")
+ except (OSError, IOError) as e:
+ click.echo(
+ f"WARNING: Manifest could not be written to {manifest_path}: {e}",
+ err=True,
+ )
@@ -278,7 +395,13 @@ def download(
@click.option(
"--force", is_flag=True, help="Force deletion without confirmation prompt"
)
-def delete(databusuris: List[str], databus_key: str, dry_run: bool, force: bool):
+@click.option(
+ "--manifest",
+ "manifest_path",
+ default=None,
+ help="Write a JSON-LD manifest of this operation to PATH.",
+)
+def delete(databusuris: List[str], databus_key: str, dry_run: bool, force: bool, manifest_path):
"""
Delete a dataset from the databus.
@@ -286,12 +409,36 @@ def delete(databusuris: List[str], databus_key: str, dry_run: bool, force: bool)
Will recursively delete all data associated with the dataset.
"""
- api_delete(
- databusURIs=databusuris,
- databus_key=databus_key,
- dry_run=dry_run,
- force=force,
- )
+ manifest_context = None
+ if manifest_path:
+ manifest_context = ManifestContext(command="delete")
+ manifest_context.record_params({
+ "databusURIs": list(databusuris),
+ "dry_run": dry_run,
+ })
+
+ try:
+ api_delete(
+ databusURIs=databusuris,
+ databus_key=databus_key,
+ dry_run=dry_run,
+ force=force,
+ manifest_context=manifest_context,
+ )
+ except Exception as exc:
+ if manifest_context:
+ manifest_context.record_operation_error(exc)
+ raise
+ finally:
+ if manifest_path and manifest_context is not None:
+ try:
+ actual_path = ManifestWriter.write(manifest_context, manifest_path)
+ click.echo(f"Manifest written to {actual_path}")
+ except (OSError, IOError) as e:
+ click.echo(
+ f"WARNING: Manifest could not be written to {manifest_path}: {e}",
+ err=True,
+ )
if __name__ == "__main__":
diff --git a/databusclient/manifest/__init__.py b/databusclient/manifest/__init__.py
new file mode 100644
index 0000000..791429d
--- /dev/null
+++ b/databusclient/manifest/__init__.py
@@ -0,0 +1,5 @@
+"""Manifest system for the Databus Python Client.
+
+Provides ManifestContext (records operation details in memory)
+and ManifestWriter (serializes to JSON-LD on disk).
+"""
\ No newline at end of file
diff --git a/databusclient/manifest/context.py b/databusclient/manifest/context.py
new file mode 100644
index 0000000..790f909
--- /dev/null
+++ b/databusclient/manifest/context.py
@@ -0,0 +1,159 @@
+"""ManifestContext — in-memory record of a Databus operation.
+
+Created by the CLI when --manifest is passed. Threaded through
+API functions as manifest_context=None. When None, all recording
+calls are no-ops and existing behavior is completely unchanged.
+"""
+
+from __future__ import annotations
+
+import traceback as tb
+from datetime import datetime, timezone
+from typing import Optional
+
+
+class ManifestContext:
+ """Records all details of a Databus operation in memory.
+
+ Created once per CLI invocation when --manifest is passed.
+ Passed as manifest_context parameter to download(), deploy(),
+ delete(). When --manifest is not passed, manifest_context is
+ None and all recording is skipped.
+
+ Sensitive fields (vault_token, api_key, local_dir) are never
+ passed to this class — they are excluded at the CLI layer.
+ """
+
+ def __init__(
+ self,
+ command: str,
+ endpoint: Optional[str] = None,
+ auth_method: Optional[str] = None,
+ ) -> None:
+ """Initialise a new manifest context.
+
+ Args:
+ command: CLI command name ('download', 'deploy', 'delete').
+ endpoint: SPARQL endpoint URL used (if applicable).
+ auth_method: Authentication method used ('vault_token',
+ 'databus_key', or None for public access).
+ """
+ self.command = command
+ self.endpoint = endpoint
+ self.auth_method = auth_method
+ self.issued: str = datetime.now(timezone.utc).isoformat()
+ self.replay_params: dict = {}
+ self.files: list = []
+ self.operation_error: Optional[dict] = None
+
+ def record_params(self, params: dict) -> None:
+ """Save the replay parameters for this operation.
+
+ Stores CLI-level inputs needed to reconstruct the operation.
+ Sensitive fields (vault_token, api_key, local_dir) must be
+ excluded by the caller before passing params here.
+
+ Args:
+ params: Dict of safe CLI parameters to store for replay.
+ """
+ self.replay_params = params
+
+ def record_file(
+ self,
+ url: str,
+ status: str,
+ sha256: Optional[str] = None,
+ size_bytes: Optional[int] = None,
+ compression: Optional[str] = None,
+ downloaded_at: Optional[str] = None,
+ error_message: Optional[str] = None,
+ error_traceback: Optional[str] = None,
+ retry_count: int = 0,
+ ) -> None:
+ """Record the outcome of processing a single file or URI.
+
+ Called after each file download, deploy distribution, or
+ delete operation completes (successfully or not).
+
+ Args:
+ url: The file URL or Databus URI that was processed.
+ status: 'success' or 'failed'.
+ sha256: SHA-256 checksum of the file (if available).
+ size_bytes: File size in bytes (if available).
+ compression: Compression format of the file (if applicable).
+ downloaded_at: ISO-8601 timestamp of when file was processed.
+ error_message: Error description if status is 'failed'.
+ error_traceback: Full traceback string if status is 'failed'.
+ retry_count: Number of retries attempted (default 0).
+ """
+ entry: dict = {
+ "url": url,
+ "status": status,
+ }
+ if sha256 is not None:
+ entry["sha256"] = sha256
+ if size_bytes is not None:
+ entry["size_bytes"] = size_bytes
+ if compression is not None:
+ entry["compression"] = compression
+ if downloaded_at is not None:
+ entry["downloaded_at"] = downloaded_at
+ if error_message is not None:
+ entry["error_message"] = error_message
+ if error_traceback is not None:
+ entry["error_traceback"] = error_traceback
+ if retry_count:
+ entry["retry_count"] = retry_count
+
+ self.files.append(entry)
+
+ def record_file_error(self, url: str, exc: Exception) -> None:
+ """Convenience method to record a failed file from an exception.
+
+ Captures the exception message and full traceback automatically.
+
+ Args:
+ url: The file URL or Databus URI that failed.
+ exc: The exception that caused the failure.
+ """
+ self.record_file(
+ url=url,
+ status="failed",
+ error_message=str(exc),
+ error_traceback=tb.format_exc(),
+ )
+
+ def record_operation_error(self, exc: Exception) -> None:
+ """Record a top-level operation failure.
+
+ Used when the entire operation fails (e.g. DeployError, auth failure)
+ rather than an individual file failing. Captures the exception message
+ and full traceback so the manifest is useful for debugging even when
+ no per-file recording happened.
+
+ Args:
+ exc: The exception that caused the operation to fail.
+ """
+ self.operation_error = {
+ "error_message": str(exc),
+ "error_traceback": tb.format_exc(),
+ "error_type": type(exc).__name__,
+ }
+
+ def summary(self) -> dict:
+ """Return execution summary counts.
+
+ Returns:
+ Dict with total, succeeded, failed counts and total_bytes.
+ """
+ succeeded = sum(1 for f in self.files if f["status"] == "success")
+ failed = sum(1 for f in self.files if f["status"] == "failed")
+ total_bytes = sum(
+ f.get("size_bytes", 0) for f in self.files if f["status"] == "success"
+ )
+ return {
+ "total": len(self.files),
+ "succeeded": succeeded,
+ "failed": failed,
+ "total_bytes": total_bytes,
+ }
\ No newline at end of file
diff --git a/databusclient/manifest/writer.py b/databusclient/manifest/writer.py
new file mode 100644
index 0000000..5e606aa
--- /dev/null
+++ b/databusclient/manifest/writer.py
@@ -0,0 +1,161 @@
+"""ManifestWriter — serializes a ManifestContext to a JSON-LD file.
+
+Called once at the end of a CLI operation. If writing fails,
+a warning is printed and the CLI exits with code 0 (the actual
+operation already succeeded).
+"""
+
+from __future__ import annotations
+
+import json
+import os
+
+from databusclient.manifest.context import ManifestContext
+
+# JSON-LD context using DataID vocabulary — same vocabulary used
+# by the Databus platform itself for semantic interoperability.
+_JSONLD_CONTEXT = {
+ "dataid": "http://dataid.dbpedia.org/ns#",
+ "dcat": "http://www.w3.org/ns/dcat#",
+ "dcterms": "http://purl.org/dc/terms/",
+ "xsd": "http://www.w3.org/2001/XMLSchema#",
+ "dbus": "http://databus.dbpedia.org/manifest/ns#",
+}
+
+_SCHEMA_VERSION = "1.0"
+_CLIENT_VERSION = "0.15"
+
+
+class ManifestWriter:
+ """Serializes a ManifestContext to a JSON-LD manifest file."""
+
+ @staticmethod
+ def write(context: ManifestContext, path: str) -> str:
+ """Write the manifest to a JSON-LD file at the given path.
+
+ Creates parent directories if they do not exist.
+ If a file already exists at `path`, auto-suffixes with _1, _2, etc.
+ and prints a warning rather than silently overwriting.
+ On failure, raises OSError — callers should catch and warn.
+
+ Args:
+ context: The completed ManifestContext to serialize.
+ path: File path to write the manifest to.
+
+ Raises:
+ OSError: If the file cannot be written, or if path is a directory.
+ """
+ if path.endswith(("/", "\\")) or os.path.isdir(path):
+ stripped = path.rstrip("/\\")
+ raise OSError(
+ f"--manifest path '{path}' is a directory, not a file. "
+ f"Please provide a full file path, e.g. '{stripped}/manifest.jsonld'."
+ )
+
+ summary = context.summary()
+
+ # Build file entries using DataID vocabulary
+ file_entries = []
+ for f in context.files:
+ entry: dict = {
+ "@type": "dataid:File",
+ "dcat:downloadURL": f["url"],
+ "dbus:status": f["status"],
+ }
+ if f.get("sha256"):
+ entry["dataid:checksum"] = f["sha256"]
+ if f.get("size_bytes") is not None:
+ entry["dataid:byteSize"] = f["size_bytes"]
+ if f.get("compression"):
+ entry["dataid:compression"] = f["compression"]
+ if f.get("downloaded_at"):
+ entry["dbus:processedAt"] = {
+ "@value": f["downloaded_at"],
+ "@type": "xsd:dateTime",
+ }
+ if f.get("error_message"):
+ entry["dbus:errorMessage"] = f["error_message"]
+ if f.get("error_traceback"):
+ entry["dbus:errorTraceback"] = f["error_traceback"]
+ if f.get("retry_count"):
+ entry["dbus:retryCount"] = f["retry_count"]
+ file_entries.append(entry)
+
+ manifest = {
+ "@context": _JSONLD_CONTEXT,
+ "@type": "dbus:OperationManifest",
+ "dbus:schemaVersion": _SCHEMA_VERSION,
+ "dbus:clientVersion": _CLIENT_VERSION,
+ "dbus:command": context.command,
+ "dcterms:issued": {
+ "@value": context.issued,
+ "@type": "xsd:dateTime",
+ },
+ }
+
+ if context.endpoint:
+ manifest["dbus:endpoint"] = context.endpoint
+ if context.auth_method:
+ manifest["dbus:authMethod"] = context.auth_method
+ if context.replay_params:
+ manifest["dbus:replayParams"] = context.replay_params
+
+ manifest["dataid:distribution"] = {
+ "@type": "dataid:Distribution",
+ "dataid:file": file_entries,
+ }
+
+ manifest["dbus:executionResult"] = {
+ "@type": "dbus:ExecutionSummary",
+ "dbus:totalFiles": summary["total"],
+ "dbus:succeeded": summary["succeeded"],
+ "dbus:failed": summary["failed"],
+ "dbus:totalBytes": summary["total_bytes"],
+ }
+
+ if context.operation_error:
+ manifest["dbus:operationError"] = {
+ "@type": "dbus:OperationError",
+ "dbus:errorType": context.operation_error["error_type"],
+ "dbus:errorMessage": context.operation_error["error_message"],
+ "dbus:errorTraceback": context.operation_error["error_traceback"],
+ }
+
+ parent = os.path.dirname(os.path.abspath(path))
+ if parent:
+ os.makedirs(parent, exist_ok=True)
+
+ final_path = ManifestWriter._resolve_available_path(path)
+ if final_path != path:
+ print(
+ f"WARNING: manifest already exists at '{path}', "
+ f"creating '{final_path}' instead"
+ )
+
+ with open(final_path, "w", encoding="utf-8") as f:
+ json.dump(manifest, f, indent=2, ensure_ascii=False)
+ return final_path
+
+ @staticmethod
+ def _resolve_available_path(path: str) -> str:
+ """Return a non-colliding path, auto-suffixing with _1, _2, ... if needed.
+
+ If `path` does not exist, it is returned unchanged. If it exists,
+ appends _1, _2, etc. before the extension until a free path is found.
+
+ Args:
+ path: Desired manifest file path.
+
+ Returns:
+ A path that does not currently exist on disk.
+ """
+ if not os.path.exists(path):
+ return path
+
+ base, ext = os.path.splitext(path)
+ counter = 1
+ while True:
+ candidate = f"{base}_{counter}{ext}"
+ if not os.path.exists(candidate):
+ return candidate
+ counter += 1
\ No newline at end of file
diff --git a/examples/reproducible-download.md b/examples/reproducible-download.md
new file mode 100644
index 0000000..d5e1919
--- /dev/null
+++ b/examples/reproducible-download.md
@@ -0,0 +1,39 @@
+# Reproducible Research Download with Manifest
+
+This example shows how to use `--manifest` to record a download
+operation for reproducibility.
+
+## Running the download
+
+```bash
+databusclient download \
+ https://databus.dbpedia.org/dbpedia/generic/labels/2023.12.01 \
+ --localdir ./data \
+ --manifest ./manifests/labels-download.jsonld
+```
+
+This produces:
+- Downloaded files in `./data/`
+- A manifest at `./manifests/labels-download.jsonld` recording:
+ - The exact Databus URIs downloaded
+ - SHA-256 checksum and size of each file
+ - Timestamp of the download
+ - All parameters needed to reproduce the operation
+
+## What the manifest contains
+
+The manifest is a JSON-LD file using the DataID vocabulary.
+Key fields:
+
+- `dbus:replayParams` — the exact parameters used, sufficient to
+ re-run the download six months later and get the same files
+- `dataid:file` — one entry per downloaded file with checksum,
+ size, and status
+- `dbus:executionResult` — summary of succeeded/failed files
+
+## Verifying the download later
+
+Six months later, a colleague can verify the same data was
+downloaded by checking the checksums in the manifest against
+the files on disk, or by inspecting the `dbus:replayParams`
+to understand exactly what was fetched and when.
\ No newline at end of file
diff --git a/tests/test_manifest.py b/tests/test_manifest.py
new file mode 100644
index 0000000..573978c
--- /dev/null
+++ b/tests/test_manifest.py
@@ -0,0 +1,319 @@
+"""Tests for the manifest system (Milestone 2).
+
+Unit tests: ManifestContext records correctly, ManifestWriter
+produces valid JSON-LD.
+Edge case: manifest write failure warns but does not fail operation.
+"""
+
+import json
+import os
+import tempfile
+
+import pytest
+
+from databusclient.manifest.context import ManifestContext
+from databusclient.manifest.writer import ManifestWriter
+
+
+# ---------------------------------------------------------------------------
+# ManifestContext tests
+# ---------------------------------------------------------------------------
+
+def test_context_records_command_and_timestamp():
+ ctx = ManifestContext(command="download")
+ assert ctx.command == "download"
+ assert ctx.issued is not None
+ assert "T" in ctx.issued # ISO-8601 format
+
+
+def test_context_records_params():
+ ctx = ManifestContext(command="download")
+ ctx.record_params({"databusURIs": ["https://example.org/data"], "compression": "gz"})
+ assert ctx.replay_params["databusURIs"] == ["https://example.org/data"]
+ assert ctx.replay_params["compression"] == "gz"
+
+
+def test_context_records_successful_file():
+ ctx = ManifestContext(command="download")
+ ctx.record_file(
+ url="https://example.org/file.ttl",
+ status="success",
+ sha256="abc123",
+ size_bytes=1024,
+ )
+ assert len(ctx.files) == 1
+ assert ctx.files[0]["status"] == "success"
+ assert ctx.files[0]["sha256"] == "abc123"
+ assert ctx.files[0]["size_bytes"] == 1024
+
+
+def test_context_records_failed_file():
+ ctx = ManifestContext(command="download")
+ ctx.record_file(
+ url="https://example.org/file.ttl",
+ status="failed",
+ error_message="Connection timeout",
+ error_traceback="Traceback...",
+ )
+ assert ctx.files[0]["status"] == "failed"
+ assert ctx.files[0]["error_message"] == "Connection timeout"
+
+
+def test_context_record_file_error_convenience():
+ ctx = ManifestContext(command="download")
+ try:
+ raise ValueError("test error")
+ except ValueError as e:
+ ctx.record_file_error("https://example.org/file.ttl", e)
+ assert ctx.files[0]["status"] == "failed"
+ assert "test error" in ctx.files[0]["error_message"]
+ assert ctx.files[0]["error_traceback"] is not None
+
+
+def test_context_summary_counts():
+ ctx = ManifestContext(command="download")
+ ctx.record_file(url="https://a.org/1", status="success", size_bytes=100)
+ ctx.record_file(url="https://a.org/2", status="success", size_bytes=200)
+ ctx.record_file(url="https://a.org/3", status="failed")
+ s = ctx.summary()
+ assert s["total"] == 3
+ assert s["succeeded"] == 2
+ assert s["failed"] == 1
+ assert s["total_bytes"] == 300
+
+
+def test_context_sensitive_fields_not_stored():
+ """Sensitive fields must never appear in replay_params."""
+ ctx = ManifestContext(command="download", auth_method="vault_token")
+ ctx.record_params({
+ "databusURIs": ["https://example.org"],
+ "compression": "gz",
+ })
+ # auth_method is stored (it's safe — describes the method, not the credential)
+ assert ctx.auth_method == "vault_token"
+ # but the actual token must not be in replay_params
+ assert "vault_token" not in ctx.replay_params
+ assert "databus_key" not in ctx.replay_params
+ assert "token" not in ctx.replay_params
+
+
+# ---------------------------------------------------------------------------
+# ManifestWriter tests
+# ---------------------------------------------------------------------------
+
+def test_writer_produces_valid_jsonld():
+ ctx = ManifestContext(command="download", endpoint="https://databus.dbpedia.org/sparql")
+ ctx.record_params({"databusURIs": ["https://example.org/data"]})
+ ctx.record_file(
+ url="https://example.org/file.ttl",
+ status="success",
+ sha256="abc123",
+ size_bytes=1024,
+ )
+
+ with tempfile.NamedTemporaryFile(suffix=".jsonld", delete=False) as f:
+ path = f.name
+ os.remove(path) # NamedTemporaryFile creates an empty file; remove it so write() doesn't auto-suffix
+ try:
+ actual_path = ManifestWriter.write(ctx, path)
+ with open(actual_path, "r", encoding="utf-8") as f:
+ manifest = json.load(f)
+
+ assert manifest["@type"] == "dbus:OperationManifest"
+ assert manifest["dbus:command"] == "download"
+ assert manifest["dbus:schemaVersion"] == "1.0"
+ assert "dcterms:issued" in manifest
+ assert manifest["dbus:endpoint"] == "https://databus.dbpedia.org/sparql"
+ assert manifest["dbus:replayParams"]["databusURIs"] == ["https://example.org/data"]
+
+ files = manifest["dataid:distribution"]["dataid:file"]
+ assert len(files) == 1
+ assert files[0]["dcat:downloadURL"] == "https://example.org/file.ttl"
+ assert files[0]["dbus:status"] == "success"
+ assert files[0]["dataid:checksum"] == "abc123"
+ assert files[0]["dataid:byteSize"] == 1024
+
+ result = manifest["dbus:executionResult"]
+ assert result["dbus:totalFiles"] == 1
+ assert result["dbus:succeeded"] == 1
+ assert result["dbus:failed"] == 0
+ finally:
+ if os.path.exists(actual_path):
+ os.remove(actual_path)
+
+
+def test_writer_creates_parent_directories():
+ ctx = ManifestContext(command="delete")
+ ctx.record_file(url="https://example.org/v1", status="success")
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ path = os.path.join(tmpdir, "nested", "dir", "manifest.jsonld")
+ ManifestWriter.write(ctx, path)
+ assert os.path.exists(path)
+
+
+def test_writer_records_failed_file():
+ ctx = ManifestContext(command="download")
+ ctx.record_file(
+ url="https://example.org/file.ttl",
+ status="failed",
+ error_message="Timeout",
+ error_traceback="Traceback...",
+ )
+
+ with tempfile.NamedTemporaryFile(suffix=".jsonld", delete=False) as f:
+ path = f.name
+ os.remove(path)
+ try:
+ actual_path = ManifestWriter.write(ctx, path)
+ with open(actual_path, "r", encoding="utf-8") as f:
+ manifest = json.load(f)
+ files = manifest["dataid:distribution"]["dataid:file"]
+ assert files[0]["dbus:status"] == "failed"
+ assert files[0]["dbus:errorMessage"] == "Timeout"
+ finally:
+ if os.path.exists(actual_path):
+ os.remove(actual_path)
+
+
+def test_writer_failure_raises_oserror():
+ """Writer raises OSError on invalid path — caller should catch and warn.
+
+ Uses a file as the parent directory, which is always invalid on all
+ platforms (Windows and Unix) since you cannot create a directory
+ inside a file.
+ """
+ ctx = ManifestContext(command="download")
+ ctx.record_file(url="https://example.org/f", status="success")
+
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as f:
+ file_as_parent = f.name
+
+ try:
+ # Use an existing file as if it were a parent directory —
+ # always raises OSError on all platforms
+ invalid_path = os.path.join(file_as_parent, "manifest.jsonld")
+ with pytest.raises(OSError):
+ ManifestWriter.write(ctx, invalid_path)
+ finally:
+ if os.path.exists(file_as_parent):
+ os.remove(file_as_parent)
+
+def test_writer_auto_suffix_on_collision():
+ """If the manifest path already exists, auto-suffix with _1 and warn."""
+ ctx1 = ManifestContext(command="download")
+ ctx1.record_file(url="https://example.org/f1", status="success")
+
+ ctx2 = ManifestContext(command="download")
+ ctx2.record_file(url="https://example.org/f2", status="success")
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ path = os.path.join(tmpdir, "run.jsonld")
+
+ first_path = ManifestWriter.write(ctx1, path)
+ assert first_path == path
+
+ second_path = ManifestWriter.write(ctx2, path)
+ assert second_path == os.path.join(tmpdir, "run_1.jsonld")
+
+ # Original file must be untouched (still has ctx1's data)
+ with open(first_path, "r", encoding="utf-8") as f:
+ original = json.load(f)
+ assert original["dataid:distribution"]["dataid:file"][0]["dcat:downloadURL"] == "https://example.org/f1"
+
+ with open(second_path, "r", encoding="utf-8") as f:
+ suffixed = json.load(f)
+ assert suffixed["dataid:distribution"]["dataid:file"][0]["dcat:downloadURL"] == "https://example.org/f2"
+
+
+def test_writer_auto_suffix_increments():
+ """Repeated collisions increment the suffix: _1, then _2."""
+ with tempfile.TemporaryDirectory() as tmpdir:
+ path = os.path.join(tmpdir, "run.jsonld")
+
+ ctx_a = ManifestContext(command="download")
+ ctx_b = ManifestContext(command="download")
+ ctx_c = ManifestContext(command="download")
+
+ path_a = ManifestWriter.write(ctx_a, path)
+ path_b = ManifestWriter.write(ctx_b, path)
+ path_c = ManifestWriter.write(ctx_c, path)
+
+ assert path_a == path
+ assert path_b == os.path.join(tmpdir, "run_1.jsonld")
+ assert path_c == os.path.join(tmpdir, "run_2.jsonld")
+
+
+def test_writer_rejects_invalid_path():
+ """Directory paths (existing dir, or trailing slash) raise OSError."""
+ ctx = ManifestContext(command="download")
+ ctx.record_file(url="https://example.org/f", status="success")
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ # Case 1: path is an existing directory
+ with pytest.raises(OSError, match="is a directory"):
+ ManifestWriter.write(ctx, tmpdir)
+
+ # Case 2: path ends with a trailing slash
+ trailing_slash_path = os.path.join(tmpdir, "subdir") + os.sep
+ with pytest.raises(OSError, match="is a directory"):
+ ManifestWriter.write(ctx, trailing_slash_path)
+
+def test_context_records_operation_error():
+ """record_operation_error captures exception type, message, and traceback."""
+ ctx = ManifestContext(command="deploy")
+ try:
+ raise ValueError("Authentication failed.")
+ except ValueError as e:
+ ctx.record_operation_error(e)
+
+ assert ctx.operation_error is not None
+ assert ctx.operation_error["error_type"] == "ValueError"
+ assert "Authentication failed." in ctx.operation_error["error_message"]
+ assert ctx.operation_error["error_traceback"] is not None
+
+
+def test_writer_includes_operation_error():
+ """ManifestWriter writes dbus:operationError when operation_error is set."""
+ ctx = ManifestContext(command="deploy")
+ ctx.record_params({"version_id": "https://example.org/v1"})
+ try:
+ raise RuntimeError("DeployError: bad API key")
+ except RuntimeError as e:
+ ctx.record_operation_error(e)
+
+ with tempfile.NamedTemporaryFile(suffix=".jsonld", delete=False) as f:
+ path = f.name
+ os.remove(path)
+ try:
+ actual_path = ManifestWriter.write(ctx, path)
+ with open(actual_path, "r", encoding="utf-8") as f:
+ manifest = json.load(f)
+
+ assert "dbus:operationError" in manifest
+ err = manifest["dbus:operationError"]
+ assert err["@type"] == "dbus:OperationError"
+ assert err["dbus:errorType"] == "RuntimeError"
+ assert "bad API key" in err["dbus:errorMessage"]
+ assert err["dbus:errorTraceback"] is not None
+ finally:
+ if os.path.exists(actual_path):
+ os.remove(actual_path)
+
+
+def test_writer_no_operation_error_field_when_success():
+ """dbus:operationError is absent from manifest when operation succeeded."""
+ ctx = ManifestContext(command="download")
+ ctx.record_file(url="https://example.org/f", status="success")
+
+ with tempfile.NamedTemporaryFile(suffix=".jsonld", delete=False) as f:
+ path = f.name
+ os.remove(path)
+ try:
+ actual_path = ManifestWriter.write(ctx, path)
+ with open(actual_path, "r", encoding="utf-8") as f:
+ manifest = json.load(f)
+ assert "dbus:operationError" not in manifest
+ finally:
+ if os.path.exists(actual_path):
+ os.remove(actual_path)
\ No newline at end of file