fix: correct subscription validation, byte-based log cap, partial autostart, URL scheme

- diagnostics.py: fix allow-list vs field name mismatch in subscription validator (_ALLOWED_SUBSCRIPTION_FIELDS now contains schema field names like "logFile", not operation names like "logFileSubscription", matching what _SUBSCRIPTION_NAME_PATTERN extracts); add _validate_subscription_query() called before any network I/O; replace chained .replace() URL building with build_ws_url(); gate connection_issues on current failure state via _analyze_subscription_status() - manager.py: add _cap_log_content() with byte-count pre-check (len(value.encode("utf-8", errors="replace")) > _MAX_RESOURCE_DATA_BYTES) so multibyte UTF-8 content cannot bypass the 1 MB cap - resources.py: add double-checked locking (_startup_lock) in ensure_subscriptions_started(); propagate exception from auto_start_all_subscriptions() via raise so _subscriptions_started=True is never set after a failed init - utils.py: add build_ws_url() that raises ValueError on unknown/missing URL scheme instead of silently falling through; add _analyze_subscription_status() helper that gates connection_issues on current failure state Resolves review threads PRRT_kwDOO6Hdxs50E50Y PRRT_kwDOO6Hdxs50E50a PRRT_kwDOO6Hdxs50E50c PRRT_kwDOO6Hdxs50E50d PRRT_kwDOO6Hdxs50E2iN PRRT_kwDOO6Hdxs50E2h8
2026-03-23 12:39:24 -07:00 · 2026-03-13 10:38:17 -04:00
parent 5b6a728f45
commit 9026faaa7c
4 changed files with 227 additions and 52 deletions
--- a/unraid_mcp/subscriptions/manager.py
+++ b/unraid_mcp/subscriptions/manager.py
@@ -20,6 +20,57 @@ from ..core.types import SubscriptionData
 from .utils import build_ws_ssl_context


+# Resource data size limits to prevent unbounded memory growth
+_MAX_RESOURCE_DATA_BYTES = 1_048_576  # 1 MB
+_MAX_RESOURCE_DATA_LINES = 5_000
+
+
+def _cap_log_content(data: dict[str, Any]) -> dict[str, Any]:
+    """Cap log content in subscription data to prevent unbounded memory growth.
+
+    Returns a new dict — does NOT mutate the input. If any nested 'content'
+    field (from log subscriptions) exceeds the byte limit, truncate it to the
+    most recent _MAX_RESOURCE_DATA_LINES lines.
+
+    The final content is guaranteed to be <= _MAX_RESOURCE_DATA_BYTES.
+    """
+    result: dict[str, Any] = {}
+    for key, value in data.items():
+        if isinstance(value, dict):
+            result[key] = _cap_log_content(value)
+        elif (
+            key == "content"
+            and isinstance(value, str)
+            # Pre-check uses byte count so multibyte UTF-8 chars cannot bypass the cap
+            and len(value.encode("utf-8", errors="replace")) > _MAX_RESOURCE_DATA_BYTES
+        ):
+            lines = value.splitlines()
+            original_line_count = len(lines)
+
+            # Keep most recent lines first.
+            if len(lines) > _MAX_RESOURCE_DATA_LINES:
+                lines = lines[-_MAX_RESOURCE_DATA_LINES:]
+
+            truncated = "\n".join(lines)
+            # Encode once and slice bytes instead of an O(n²) line-trim loop
+            encoded = truncated.encode("utf-8", errors="replace")
+            if len(encoded) > _MAX_RESOURCE_DATA_BYTES:
+                truncated = encoded[-_MAX_RESOURCE_DATA_BYTES:].decode("utf-8", errors="ignore")
+                # Strip partial first line that may have been cut mid-character
+                nl_pos = truncated.find("\n")
+                if nl_pos != -1:
+                    truncated = truncated[nl_pos + 1 :]
+
+            logger.warning(
+                f"[RESOURCE] Capped log content from {original_line_count} to "
+                f"{len(lines)} lines ({len(value)} -> {len(truncated)} chars)"
+            )
+            result[key] = truncated
+        else:
+            result[key] = value
+    return result
+
+
 class SubscriptionManager:
    """Manages GraphQL subscriptions and converts them to MCP resources."""