refactor: comprehensive code review fixes across 31 files

Addresses all critical, high, medium, and low issues from full codebase review. 494 tests pass, ruff clean, ty type-check clean. Security: - Add tool_error_handler context manager (exceptions.py) — standardised error handling, eliminates 11 bare except-reraise patterns - Remove unused exception subclasses (ConfigurationError, UnraidAPIError, SubscriptionError, ValidationError, IdempotentOperationError) - Harden GraphQL subscription query validator with allow-list and forbidden-keyword regex (diagnostics.py) - Add input validation for rclone create_remote config_data: injection, path-traversal, and key-count limits (rclone.py) - Validate notifications importance enum before GraphQL request (notifications.py) - Sanitise HTTP/network/JSON error messages — no raw exception strings leaked to clients (client.py) - Strip path/creds from displayed API URL via _safe_display_url (health.py) - Enable Ruff S (bandit) rule category in pyproject.toml - Harden container mutations to strict-only matching — no fuzzy/substring for destructive operations (docker.py) Performance: - Token-bucket rate limiter (90 tokens, 9 req/s) with 429 retry backoff (client.py) - Lazy asyncio.Lock init via _get_client_lock() — fixes event-loop module-load crash (client.py) - Double-checked locking in get_http_client() for fast-path (client.py) - Short hex container ID fast-path skips list fetch (docker.py) - Cap resource_data log content to 1 MB / 5,000 lines (manager.py) - Reset reconnect counter after 30 s stable connection (manager.py) - Move tail_lines validation to module level; enforce 10,000 line cap (storage.py, docker.py) - force_terminal=True removed from logging RichHandler (logging.py) Architecture: - Register diagnostic tools in server startup (server.py) - Move ALL_ACTIONS computation to module level in all tools - Consolidate format_kb / format_bytes into shared core/utils.py - Add _safe_get() helper in core/utils.py for nested dict traversal - Extract _analyze_subscription_status() from health.py diagnose handler - Validate required config at startup — fail fast with CRITICAL log (server.py) Code quality: - Remove ~90 lines of dead Rich formatting helpers from logging.py - Remove dead self.websocket attribute from SubscriptionManager - Remove dead setup_uvicorn_logging() wrapper - Move _VALID_IMPORTANCE to module level (N806 fix) - Add slots=True to all three dataclasses (SubscriptionData, SystemHealth, APIResponse) - Fix None rendering as literal "None" string in info.py summaries - Change fuzzy-match log messages from INFO to DEBUG (docker.py) - UTC-aware datetimes throughout (manager.py, diagnostics.py) Infrastructure: - Upgrade base image python:3.11-slim → python:3.12-slim (Dockerfile) - Add non-root appuser (UID/GID 1000) with HEALTHCHECK (Dockerfile) - Add read_only, cap_drop: ALL, tmpfs /tmp to docker-compose.yml - Single-source version via importlib.metadata (pyproject.toml → __init__.py) - Add open_timeout to all websockets.connect() calls Tests: - Update error message matchers to match sanitised messages (test_client.py) - Fix patch targets for UNRAID_API_URL → utils module (test_subscriptions.py) - Fix importance="info" → importance="normal" (test_notifications.py, http_layer) - Fix naive datetime fixtures → UTC-aware (test_subscriptions.py) Co-authored-by: Claude <claude@anthropic.com>
2026-02-18 01:02:13 -05:00
parent 5b6a728f45
commit 316193c04b
32 changed files with 995 additions and 622 deletions
--- a/unraid_mcp/tools/docker.py
+++ b/unraid_mcp/tools/docker.py
@@ -11,7 +11,8 @@ from fastmcp import FastMCP

 from ..config.logging import logger
 from ..core.client import make_graphql_request
-from ..core.exceptions import ToolError
+from ..core.exceptions import ToolError, tool_error_handler
+from ..core.utils import safe_get


 QUERIES: dict[str, str] = {
@@ -99,6 +100,10 @@ MUTATIONS: dict[str, str] = {
 }

 DESTRUCTIVE_ACTIONS = {"remove"}
+_MUTATION_ACTIONS = {"start", "stop", "restart", "pause", "unpause", "remove", "update"}
+# NOTE (Code-M-07): "details" and "logs" are listed here because they require a
+# container_id parameter, but unlike mutations they use fuzzy name matching (not
+# strict). This is intentional: read-only queries are safe with fuzzy matching.
 _ACTIONS_REQUIRING_CONTAINER_ID = {
    "start",
    "stop",
@@ -111,6 +116,7 @@ _ACTIONS_REQUIRING_CONTAINER_ID = {
    "logs",
 }
 ALL_ACTIONS = set(QUERIES) | set(MUTATIONS) | {"restart"}
+_MAX_TAIL_LINES = 10_000

 DOCKER_ACTIONS = Literal[
    "list",
@@ -130,33 +136,28 @@ DOCKER_ACTIONS = Literal[
    "check_updates",
 ]

-# Docker container IDs: 64 hex chars + optional suffix (e.g., ":local")
+# Full PrefixedID: 64 hex chars + optional suffix (e.g., ":local")
 _DOCKER_ID_PATTERN = re.compile(r"^[a-f0-9]{64}(:[a-z0-9]+)?$", re.IGNORECASE)

-
-def _safe_get(data: dict[str, Any], *keys: str, default: Any = None) -> Any:
-    """Safely traverse nested dict keys, handling None intermediates."""
-    current = data
-    for key in keys:
-        if not isinstance(current, dict):
-            return default
-        current = current.get(key)
-    return current if current is not None else default
+# Short hex prefix: at least 12 hex chars (standard Docker short ID length)
+_DOCKER_SHORT_ID_PATTERN = re.compile(r"^[a-f0-9]{12,63}$", re.IGNORECASE)


 def find_container_by_identifier(
-    identifier: str, containers: list[dict[str, Any]]
+    identifier: str, containers: list[dict[str, Any]], *, strict: bool = False
 ) -> dict[str, Any] | None:
-    """Find a container by ID or name with fuzzy matching.
+    """Find a container by ID or name with optional fuzzy matching.

    Match priority:
      1. Exact ID match
      2. Exact name match (case-sensitive)
+
+    When strict=False (default), also tries:
      3. Name starts with identifier (case-insensitive)
      4. Name contains identifier as substring (case-insensitive)

-    Note: Short identifiers (e.g. "db") may match unintended containers
-    via substring. Use more specific names or IDs for precision.
+    When strict=True, only exact matches (1 & 2) are used.
+    Use strict=True for mutations to prevent targeting the wrong container.
    """
    if not containers:
        return None
@@ -168,20 +169,24 @@ def find_container_by_identifier(
        if identifier in c.get("names", []):
            return c

+    # Strict mode: no fuzzy matching allowed
+    if strict:
+        return None
+
    id_lower = identifier.lower()

    # Priority 3: prefix match (more precise than substring)
    for c in containers:
        for name in c.get("names", []):
            if name.lower().startswith(id_lower):
-                logger.info(f"Prefix match: '{identifier}' -> '{name}'")
+                logger.debug(f"Prefix match: '{identifier}' -> '{name}'")
                return c

    # Priority 4: substring match (least precise)
    for c in containers:
        for name in c.get("names", []):
            if id_lower in name.lower():
-                logger.info(f"Substring match: '{identifier}' -> '{name}'")
+                logger.debug(f"Substring match: '{identifier}' -> '{name}'")
                return c

    return None
@@ -195,27 +200,62 @@ def get_available_container_names(containers: list[dict[str, Any]]) -> list[str]
    return names


-async def _resolve_container_id(container_id: str) -> str:
-    """Resolve a container name/identifier to its actual PrefixedID."""
+def _looks_like_container_id(identifier: str) -> bool:
+    """Check if an identifier looks like a container ID (full or short hex prefix)."""
+    return bool(_DOCKER_ID_PATTERN.match(identifier) or _DOCKER_SHORT_ID_PATTERN.match(identifier))
+
+
+async def _resolve_container_id(container_id: str, *, strict: bool = False) -> str:
+    """Resolve a container name/identifier to its actual PrefixedID.
+
+    Optimization: if the identifier is a full 64-char hex ID (with optional
+    :suffix), skip the container list fetch entirely and use it directly.
+    If it's a short hex prefix (12-63 chars), fetch the list and match by
+    ID prefix. Only fetch the container list for name-based lookups.
+
+    Args:
+        container_id: Container name or ID to resolve
+        strict: When True, only exact name/ID matches are allowed (no fuzzy).
+                Use for mutations to prevent targeting the wrong container.
+    """
+    # Full PrefixedID: skip the list fetch entirely
    if _DOCKER_ID_PATTERN.match(container_id):
        return container_id

-    logger.info(f"Resolving container identifier '{container_id}'")
+    logger.info(f"Resolving container identifier '{container_id}' (strict={strict})")
    list_query = """
        query ResolveContainerID {
          docker { containers(skipCache: true) { id names } }
        }
    """
    data = await make_graphql_request(list_query)
-    containers = _safe_get(data, "docker", "containers", default=[])
-    resolved = find_container_by_identifier(container_id, containers)
+    containers = safe_get(data, "docker", "containers", default=[])
+
+    # Short hex prefix: match by ID prefix before trying name matching
+    if _DOCKER_SHORT_ID_PATTERN.match(container_id):
+        id_lower = container_id.lower()
+        for c in containers:
+            cid = (c.get("id") or "").lower()
+            if cid.startswith(id_lower) or cid.split(":")[0].startswith(id_lower):
+                actual_id = str(c.get("id", ""))
+                logger.info(f"Resolved short ID '{container_id}' -> '{actual_id}'")
+                return actual_id
+
+    resolved = find_container_by_identifier(container_id, containers, strict=strict)
    if resolved:
        actual_id = str(resolved.get("id", ""))
        logger.info(f"Resolved '{container_id}' -> '{actual_id}'")
        return actual_id

    available = get_available_container_names(containers)
-    msg = f"Container '{container_id}' not found."
+    if strict:
+        msg = (
+            f"Container '{container_id}' not found by exact match. "
+            f"Mutations require an exact container name or full ID — "
+            f"fuzzy/substring matching is not allowed for safety."
+        )
+    else:
+        msg = f"Container '{container_id}' not found."
    if available:
        msg += f" Available: {', '.join(available[:10])}"
    raise ToolError(msg)
@@ -264,38 +304,40 @@ def register_docker_tool(mcp: FastMCP) -> None:
        if action == "network_details" and not network_id:
            raise ToolError("network_id is required for 'network_details' action")

-        try:
+        if tail_lines < 1 or tail_lines > _MAX_TAIL_LINES:
+            raise ToolError(f"tail_lines must be between 1 and {_MAX_TAIL_LINES}, got {tail_lines}")
+
+        with tool_error_handler("docker", action, logger):
            logger.info(f"Executing unraid_docker action={action}")

            # --- Read-only queries ---
            if action == "list":
                data = await make_graphql_request(QUERIES["list"])
-                containers = _safe_get(data, "docker", "containers", default=[])
-                return {"containers": list(containers) if isinstance(containers, list) else []}
+                containers = safe_get(data, "docker", "containers", default=[])
+                return {"containers": containers}

            if action == "details":
+                # Resolve name -> ID first (skips list fetch if already an ID)
+                actual_id = await _resolve_container_id(container_id or "")
                data = await make_graphql_request(QUERIES["details"])
-                containers = _safe_get(data, "docker", "containers", default=[])
-                container = find_container_by_identifier(container_id or "", containers)
-                if container:
-                    return container
-                available = get_available_container_names(containers)
-                msg = f"Container '{container_id}' not found."
-                if available:
-                    msg += f" Available: {', '.join(available[:10])}"
-                raise ToolError(msg)
+                containers = safe_get(data, "docker", "containers", default=[])
+                # Match by resolved ID (exact match, no second list fetch needed)
+                for c in containers:
+                    if c.get("id") == actual_id:
+                        return c
+                raise ToolError(f"Container '{container_id}' not found in details response.")

            if action == "logs":
                actual_id = await _resolve_container_id(container_id or "")
                data = await make_graphql_request(
                    QUERIES["logs"], {"id": actual_id, "tail": tail_lines}
                )
-                return {"logs": _safe_get(data, "docker", "logs")}
+                return {"logs": safe_get(data, "docker", "logs")}

            if action == "networks":
                data = await make_graphql_request(QUERIES["networks"])
                networks = data.get("dockerNetworks", [])
-                return {"networks": list(networks) if isinstance(networks, list) else []}
+                return {"networks": networks}

            if action == "network_details":
                data = await make_graphql_request(QUERIES["network_details"], {"id": network_id})
@@ -303,17 +345,17 @@ def register_docker_tool(mcp: FastMCP) -> None:

            if action == "port_conflicts":
                data = await make_graphql_request(QUERIES["port_conflicts"])
-                conflicts = _safe_get(data, "docker", "portConflicts", default=[])
-                return {"port_conflicts": list(conflicts) if isinstance(conflicts, list) else []}
+                conflicts = safe_get(data, "docker", "portConflicts", default=[])
+                return {"port_conflicts": conflicts}

            if action == "check_updates":
                data = await make_graphql_request(QUERIES["check_updates"])
-                statuses = _safe_get(data, "docker", "containerUpdateStatuses", default=[])
-                return {"update_statuses": list(statuses) if isinstance(statuses, list) else []}
+                statuses = safe_get(data, "docker", "containerUpdateStatuses", default=[])
+                return {"update_statuses": statuses}

-            # --- Mutations ---
+            # --- Mutations (strict matching: no fuzzy/substring) ---
            if action == "restart":
-                actual_id = await _resolve_container_id(container_id or "")
+                actual_id = await _resolve_container_id(container_id or "", strict=True)
                # Stop (idempotent: treat "already stopped" as success)
                stop_data = await make_graphql_request(
                    MUTATIONS["stop"],
@@ -330,7 +372,7 @@ def register_docker_tool(mcp: FastMCP) -> None:
                if start_data.get("idempotent_success"):
                    result = {}
                else:
-                    result = _safe_get(start_data, "docker", "start", default={})
+                    result = safe_get(start_data, "docker", "start", default={})
                response: dict[str, Any] = {
                    "success": True,
                    "action": "restart",
@@ -342,12 +384,12 @@ def register_docker_tool(mcp: FastMCP) -> None:

            if action == "update_all":
                data = await make_graphql_request(MUTATIONS["update_all"])
-                results = _safe_get(data, "docker", "updateAllContainers", default=[])
+                results = safe_get(data, "docker", "updateAllContainers", default=[])
                return {"success": True, "action": "update_all", "containers": results}

            # Single-container mutations
            if action in MUTATIONS:
-                actual_id = await _resolve_container_id(container_id or "")
+                actual_id = await _resolve_container_id(container_id or "", strict=True)
                op_context: dict[str, str] | None = (
                    {"operation": action} if action in ("start", "stop") else None
                )
@@ -382,10 +424,4 @@ def register_docker_tool(mcp: FastMCP) -> None:

            raise ToolError(f"Unhandled action '{action}' — this is a bug")

-        except ToolError:
-            raise
-        except Exception as e:
-            logger.error(f"Error in unraid_docker action={action}: {e}", exc_info=True)
-            raise ToolError(f"Failed to execute docker/{action}: {e!s}") from e
-
    logger.info("Docker tool registered successfully")