Remove unused MCP resources and update documentation

- Remove array_status, system_info, notifications_overview, and parity_status resources
- Keep only logs_stream resource (unraid://logs/stream) which is working properly
- Update README.md with current resource documentation and modern docker compose syntax
- Fix import path issues that were causing subscription errors
- Update environment configuration examples
- Clean up subscription manager to only include working log streaming

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Jacob Magar
2025-08-11 14:19:27 -04:00
parent f355511fe6
commit b00d78f408
29 changed files with 3641 additions and 2561 deletions

7
unraid_mcp/__init__.py Normal file
View File

@@ -0,0 +1,7 @@
"""Unraid MCP Server Package.
A modular MCP (Model Context Protocol) server that provides tools to interact
with an Unraid server's GraphQL API.
"""
__version__ = "0.1.0"

View File

@@ -0,0 +1 @@
"""Configuration management for Unraid MCP Server."""

View File

@@ -0,0 +1,92 @@
"""Logging configuration for Unraid MCP Server.
This module sets up structured logging with console and rotating file handlers
that can be used consistently across all modules.
"""
import logging
import sys
from logging.handlers import RotatingFileHandler
from .settings import LOG_LEVEL_STR, LOG_FILE_PATH
def setup_logger(name: str = "UnraidMCPServer") -> logging.Logger:
"""Set up and configure the logger with console and file handlers.
Args:
name: Logger name (defaults to UnraidMCPServer)
Returns:
Configured logger instance
"""
# Get numeric log level
numeric_log_level = getattr(logging, LOG_LEVEL_STR, logging.INFO)
# Define the logger
logger = logging.getLogger(name)
logger.setLevel(numeric_log_level)
logger.propagate = False # Prevent root logger from duplicating handlers
# Clear any existing handlers
logger.handlers.clear()
# Console Handler
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(numeric_log_level)
console_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
console_handler.setFormatter(console_formatter)
logger.addHandler(console_handler)
# File Handler with Rotation
# Rotate logs at 5MB, keep 3 backup logs
file_handler = RotatingFileHandler(
LOG_FILE_PATH,
maxBytes=5*1024*1024,
backupCount=3,
encoding='utf-8'
)
file_handler.setLevel(numeric_log_level)
file_formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(module)s - %(funcName)s - %(lineno)d - %(message)s'
)
file_handler.setFormatter(file_formatter)
logger.addHandler(file_handler)
return logger
def log_configuration_status(logger: logging.Logger) -> None:
"""Log configuration status at startup.
Args:
logger: Logger instance to use for logging
"""
from .settings import get_config_summary
logger.info(f"Logging initialized (console and file: {LOG_FILE_PATH}).")
config = get_config_summary()
# Log configuration status
if config['api_url_configured']:
logger.info(f"UNRAID_API_URL loaded: {config['api_url_preview']}")
else:
logger.warning("UNRAID_API_URL not found in environment or .env file.")
if config['api_key_configured']:
logger.info("UNRAID_API_KEY loaded: ****") # Don't log the key itself
else:
logger.warning("UNRAID_API_KEY not found in environment or .env file.")
logger.info(f"UNRAID_MCP_PORT set to: {config['server_port']}")
logger.info(f"UNRAID_MCP_HOST set to: {config['server_host']}")
logger.info(f"UNRAID_MCP_TRANSPORT set to: {config['transport']}")
logger.info(f"UNRAID_MCP_LOG_LEVEL set to: {config['log_level']}")
if not config['config_valid']:
logger.error(f"Missing required configuration: {config['missing_config']}")
# Global logger instance - modules can import this directly
logger = setup_logger()

View File

@@ -0,0 +1,104 @@
"""Configuration management for Unraid MCP Server.
This module handles loading environment variables from multiple .env locations
and provides all configuration constants used throughout the application.
"""
import os
from pathlib import Path
from typing import Union
from dotenv import load_dotenv
# Get the script directory (config module location)
SCRIPT_DIR = Path(__file__).parent # /home/user/code/unraid-mcp/unraid_mcp/config/
UNRAID_MCP_DIR = SCRIPT_DIR.parent # /home/user/code/unraid-mcp/unraid_mcp/
PROJECT_ROOT = UNRAID_MCP_DIR.parent # /home/user/code/unraid-mcp/
# Load environment variables from .env file
# In container: First try /app/.env.local (mounted), then project root .env
dotenv_paths = [
Path('/app/.env.local'), # Container mount point
PROJECT_ROOT / '.env.local', # Project root .env.local
PROJECT_ROOT / '.env', # Project root .env
UNRAID_MCP_DIR / '.env' # Local .env in unraid_mcp/
]
for dotenv_path in dotenv_paths:
if dotenv_path.exists():
load_dotenv(dotenv_path=dotenv_path)
break
# Core API Configuration
UNRAID_API_URL = os.getenv("UNRAID_API_URL")
UNRAID_API_KEY = os.getenv("UNRAID_API_KEY")
# Server Configuration
UNRAID_MCP_PORT = int(os.getenv("UNRAID_MCP_PORT", "6970"))
UNRAID_MCP_HOST = os.getenv("UNRAID_MCP_HOST", "0.0.0.0")
UNRAID_MCP_TRANSPORT = os.getenv("UNRAID_MCP_TRANSPORT", "streamable-http").lower()
# SSL Configuration
raw_verify_ssl = os.getenv("UNRAID_VERIFY_SSL", "true").lower()
if raw_verify_ssl in ["false", "0", "no"]:
UNRAID_VERIFY_SSL: Union[bool, str] = False
elif raw_verify_ssl in ["true", "1", "yes"]:
UNRAID_VERIFY_SSL = True
else: # Path to CA bundle
UNRAID_VERIFY_SSL = raw_verify_ssl
# Logging Configuration
LOG_LEVEL_STR = os.getenv('UNRAID_MCP_LOG_LEVEL', 'INFO').upper()
LOG_FILE_NAME = os.getenv("UNRAID_MCP_LOG_FILE", "unraid-mcp.log")
LOGS_DIR = PROJECT_ROOT / "logs"
LOG_FILE_PATH = LOGS_DIR / LOG_FILE_NAME
# Ensure logs directory exists
LOGS_DIR.mkdir(parents=True, exist_ok=True)
# HTTP Client Configuration
TIMEOUT_CONFIG = {
'default': 30,
'disk_operations': 90, # Longer timeout for SMART data queries
}
def validate_required_config() -> bool:
"""Validate that required configuration is present.
Returns:
bool: True if all required config is present, False otherwise.
"""
required_vars = [
("UNRAID_API_URL", UNRAID_API_URL),
("UNRAID_API_KEY", UNRAID_API_KEY)
]
missing = []
for name, value in required_vars:
if not value:
missing.append(name)
return len(missing) == 0, missing
def get_config_summary() -> dict:
"""Get a summary of current configuration (safe for logging).
Returns:
dict: Configuration summary with sensitive data redacted.
"""
is_valid, missing = validate_required_config()
return {
'api_url_configured': bool(UNRAID_API_URL),
'api_url_preview': UNRAID_API_URL[:20] + '...' if UNRAID_API_URL else None,
'api_key_configured': bool(UNRAID_API_KEY),
'server_host': UNRAID_MCP_HOST,
'server_port': UNRAID_MCP_PORT,
'transport': UNRAID_MCP_TRANSPORT,
'ssl_verify': UNRAID_VERIFY_SSL,
'log_level': LOG_LEVEL_STR,
'log_file': str(LOG_FILE_PATH),
'config_valid': is_valid,
'missing_config': missing if not is_valid else None
}

View File

@@ -0,0 +1 @@
"""Core infrastructure components for Unraid MCP Server."""

147
unraid_mcp/core/client.py Normal file
View File

@@ -0,0 +1,147 @@
"""GraphQL client for Unraid API communication.
This module provides the HTTP client interface for making GraphQL requests
to the Unraid API with proper timeout handling and error management.
"""
import json
from typing import Any
import httpx
from ..config.logging import logger
from ..config.settings import TIMEOUT_CONFIG, UNRAID_API_KEY, UNRAID_API_URL, UNRAID_VERIFY_SSL
from ..core.exceptions import ToolError
# HTTP timeout configuration
DEFAULT_TIMEOUT = httpx.Timeout(10.0, read=30.0, connect=5.0)
DISK_TIMEOUT = httpx.Timeout(10.0, read=TIMEOUT_CONFIG['disk_operations'], connect=5.0)
def is_idempotent_error(error_message: str, operation: str) -> bool:
"""Check if a GraphQL error represents an idempotent operation that should be treated as success.
Args:
error_message: The error message from GraphQL API
operation: The operation being performed (e.g., 'start', 'stop')
Returns:
True if this is an idempotent error that should be treated as success
"""
error_lower = error_message.lower()
# Docker container operation patterns
if operation == 'start':
return (
'already started' in error_lower or
'container already running' in error_lower or
'http code 304' in error_lower
)
elif operation == 'stop':
return (
'already stopped' in error_lower or
'container already stopped' in error_lower or
'container not running' in error_lower or
'http code 304' in error_lower
)
return False
async def make_graphql_request(
query: str,
variables: dict[str, Any] | None = None,
custom_timeout: httpx.Timeout | None = None,
operation_context: dict[str, str] | None = None
) -> dict[str, Any]:
"""Make GraphQL requests to the Unraid API.
Args:
query: GraphQL query string
variables: Optional query variables
custom_timeout: Optional custom timeout configuration
operation_context: Optional context for operation-specific error handling
Should contain 'operation' key (e.g., 'start', 'stop')
Returns:
Dict containing the GraphQL response data
Raises:
ToolError: For HTTP errors, network errors, or non-idempotent GraphQL errors
"""
if not UNRAID_API_URL:
raise ToolError("UNRAID_API_URL not configured")
if not UNRAID_API_KEY:
raise ToolError("UNRAID_API_KEY not configured")
headers = {
"Content-Type": "application/json",
"X-API-Key": UNRAID_API_KEY,
"User-Agent": "UnraidMCPServer/0.1.0" # Custom user-agent
}
payload = {"query": query}
if variables:
payload["variables"] = variables
logger.debug(f"Making GraphQL request to {UNRAID_API_URL}:")
logger.debug(f"Query: {query[:200]}{'...' if len(query) > 200 else ''}") # Log truncated query
if variables:
logger.debug(f"Variables: {variables}")
current_timeout = custom_timeout if custom_timeout is not None else DEFAULT_TIMEOUT
try:
async with httpx.AsyncClient(timeout=current_timeout, verify=UNRAID_VERIFY_SSL) as client:
response = await client.post(UNRAID_API_URL, json=payload, headers=headers)
response.raise_for_status() # Raise an exception for HTTP error codes 4xx/5xx
response_data = response.json()
if "errors" in response_data and response_data["errors"]:
error_details = "; ".join([err.get("message", str(err)) for err in response_data["errors"]])
# Check if this is an idempotent error that should be treated as success
if operation_context and operation_context.get('operation'):
operation = operation_context['operation']
if is_idempotent_error(error_details, operation):
logger.warning(f"Idempotent operation '{operation}' - treating as success: {error_details}")
# Return a success response with the current state information
return {
"idempotent_success": True,
"operation": operation,
"message": error_details,
"original_errors": response_data["errors"]
}
logger.error(f"GraphQL API returned errors: {response_data['errors']}")
# Use ToolError for GraphQL errors to provide better feedback to LLM
raise ToolError(f"GraphQL API error: {error_details}")
logger.debug("GraphQL request successful.")
return response_data.get("data", {}) # Return only the data part
except httpx.HTTPStatusError as e:
logger.error(f"HTTP error occurred: {e.response.status_code} - {e.response.text}")
raise ToolError(f"HTTP error {e.response.status_code}: {e.response.text}")
except httpx.RequestError as e:
logger.error(f"Request error occurred: {e}")
raise ToolError(f"Network connection error: {str(e)}")
except json.JSONDecodeError as e:
logger.error(f"Failed to decode JSON response: {e}")
raise ToolError(f"Invalid JSON response from Unraid API: {str(e)}")
def get_timeout_for_operation(operation_type: str = "default") -> httpx.Timeout:
"""Get appropriate timeout configuration for different operation types.
Args:
operation_type: Type of operation ('default', 'disk_operations')
Returns:
httpx.Timeout configuration appropriate for the operation
"""
if operation_type == "disk_operations":
return DISK_TIMEOUT
else:
return DEFAULT_TIMEOUT

View File

@@ -0,0 +1,48 @@
"""Custom exceptions for Unraid MCP Server.
This module defines custom exception classes for consistent error handling
throughout the application, with proper integration to FastMCP's error system.
"""
from fastmcp.exceptions import ToolError as FastMCPToolError
class ToolError(FastMCPToolError):
"""User-facing error that MCP clients can handle.
This is the main exception type used throughout the application for
errors that should be presented to the user/LLM in a friendly way.
Inherits from FastMCP's ToolError to ensure proper MCP protocol handling.
"""
pass
class ConfigurationError(ToolError):
"""Raised when there are configuration-related errors."""
pass
class UnraidAPIError(ToolError):
"""Raised when the Unraid API returns an error or is unreachable."""
pass
class SubscriptionError(ToolError):
"""Raised when there are WebSocket subscription-related errors."""
pass
class ValidationError(ToolError):
"""Raised when input validation fails."""
pass
class IdempotentOperationError(ToolError):
"""Raised when an operation is idempotent (already in desired state).
This is used internally to signal that an operation was already complete,
which should typically be converted to a success response rather than
propagated as an error to the user.
"""
pass

43
unraid_mcp/core/types.py Normal file
View File

@@ -0,0 +1,43 @@
"""Shared data types for Unraid MCP Server.
This module defines data classes and type definitions used across
multiple modules for consistent data handling.
"""
from dataclasses import dataclass
from datetime import datetime
from typing import Any, Dict, Optional, Union
@dataclass
class SubscriptionData:
"""Container for subscription data with metadata."""
data: Dict[str, Any]
last_updated: datetime
subscription_type: str
@dataclass
class SystemHealth:
"""Container for system health status information."""
is_healthy: bool
issues: list[str]
warnings: list[str]
last_checked: datetime
component_status: Dict[str, str]
@dataclass
class APIResponse:
"""Container for standardized API response data."""
success: bool
data: Optional[Dict[str, Any]] = None
error: Optional[str] = None
metadata: Optional[Dict[str, Any]] = None
# Type aliases for common data structures
ConfigValue = Union[str, int, bool, float, None]
ConfigDict = Dict[str, ConfigValue]
GraphQLVariables = Dict[str, Any]
HealthStatus = Dict[str, Union[str, bool, int, list]]

22
unraid_mcp/main.py Normal file
View File

@@ -0,0 +1,22 @@
#!/usr/bin/env python3
"""Unraid MCP Server - Entry Point.
This is the main entry point for the Unraid MCP Server. It imports and starts
the modular server implementation from unraid_mcp.server.
"""
def main():
"""Main entry point for the Unraid MCP Server."""
try:
from .server import run_server
run_server()
except KeyboardInterrupt:
print("\nServer stopped by user")
except Exception as e:
print(f"Server failed to start: {e}")
raise
if __name__ == "__main__":
main()

141
unraid_mcp/server.py Normal file
View File

@@ -0,0 +1,141 @@
"""Modular Unraid MCP Server.
This is the main server implementation using the modular architecture with
separate modules for configuration, core functionality, subscriptions, and tools.
"""
import sys
from fastmcp import FastMCP
from .config.logging import logger
from .config.settings import (
UNRAID_API_KEY,
UNRAID_API_URL,
UNRAID_MCP_HOST,
UNRAID_MCP_PORT,
UNRAID_MCP_TRANSPORT,
)
from .subscriptions.diagnostics import register_diagnostic_tools
from .subscriptions.manager import SubscriptionManager
from .subscriptions.resources import register_subscription_resources
from .tools.docker import register_docker_tools
from .tools.health import register_health_tools
from .tools.rclone import register_rclone_tools
from .tools.storage import register_storage_tools
from .tools.system import register_system_tools
from .tools.virtualization import register_vm_tools
# Initialize FastMCP instance
mcp = FastMCP(
name="Unraid MCP Server",
instructions="Provides tools to interact with an Unraid server's GraphQL API.",
version="0.1.0",
)
# Initialize subscription manager
subscription_manager = SubscriptionManager()
async def autostart_subscriptions():
"""Auto-start all subscriptions marked for auto-start in SubscriptionManager"""
logger.info("[AUTOSTART] Initiating subscription auto-start process...")
try:
# Use the SubscriptionManager auto-start method
await subscription_manager.auto_start_all_subscriptions()
logger.info("[AUTOSTART] Auto-start process completed successfully")
except Exception as e:
logger.error(f"[AUTOSTART] Failed during auto-start process: {e}", exc_info=True)
def register_all_modules():
"""Register all tools and resources with the MCP instance."""
try:
# Register subscription resources first
register_subscription_resources(mcp)
logger.info("📊 Subscription resources registered")
# Register diagnostic tools
register_diagnostic_tools(mcp)
logger.info("🔧 Diagnostic tools registered")
# Register all tool categories
register_system_tools(mcp)
logger.info("🖥️ System tools registered")
register_docker_tools(mcp)
logger.info("🐳 Docker tools registered")
register_vm_tools(mcp)
logger.info("💻 Virtualization tools registered")
register_storage_tools(mcp)
logger.info("💾 Storage tools registered")
register_health_tools(mcp)
logger.info("🏥 Health tools registered")
register_rclone_tools(mcp)
logger.info("☁️ RClone tools registered")
logger.info("🎯 All modules registered successfully - Server ready!")
except Exception as e:
logger.error(f"❌ Failed to register modules: {e}", exc_info=True)
raise
def run_server():
"""Run the MCP server with the configured transport."""
# Log configuration
if UNRAID_API_URL:
logger.info(f"UNRAID_API_URL loaded: {UNRAID_API_URL[:20]}...")
else:
logger.warning("UNRAID_API_URL not found in environment or .env file.")
if UNRAID_API_KEY:
logger.info("UNRAID_API_KEY loaded: ****")
else:
logger.warning("UNRAID_API_KEY not found in environment or .env file.")
logger.info(f"UNRAID_MCP_PORT set to: {UNRAID_MCP_PORT}")
logger.info(f"UNRAID_MCP_HOST set to: {UNRAID_MCP_HOST}")
logger.info(f"UNRAID_MCP_TRANSPORT set to: {UNRAID_MCP_TRANSPORT}")
# Register all modules
register_all_modules()
logger.info(f"🚀 Starting Unraid MCP Server on {UNRAID_MCP_HOST}:{UNRAID_MCP_PORT} using {UNRAID_MCP_TRANSPORT} transport...")
try:
# Auto-start subscriptions on first async operation
if UNRAID_MCP_TRANSPORT == "streamable-http":
# Use the recommended Streamable HTTP transport
mcp.run(
transport="streamable-http",
host=UNRAID_MCP_HOST,
port=UNRAID_MCP_PORT,
path="/mcp" # Standard path for MCP
)
elif UNRAID_MCP_TRANSPORT == "sse":
# Deprecated SSE transport - log warning
logger.warning("SSE transport is deprecated and may be removed in a future version. Consider switching to 'streamable-http'.")
mcp.run(
transport="sse",
host=UNRAID_MCP_HOST,
port=UNRAID_MCP_PORT,
path="/mcp" # Keep custom path for SSE
)
elif UNRAID_MCP_TRANSPORT == "stdio":
mcp.run() # Defaults to stdio
else:
logger.error(f"Unsupported MCP_TRANSPORT: {UNRAID_MCP_TRANSPORT}. Choose 'streamable-http' (recommended), 'sse' (deprecated), or 'stdio'.")
sys.exit(1)
except Exception as e:
logger.critical(f"❌ Failed to start Unraid MCP server: {e}", exc_info=True)
sys.exit(1)
if __name__ == "__main__":
run_server()

View File

@@ -0,0 +1 @@
"""WebSocket subscription system for real-time Unraid data."""

View File

@@ -0,0 +1,206 @@
"""Subscription system troubleshooting and monitoring.
This module provides diagnostic tools for WebSocket connection testing,
subscription system monitoring, and detailed status reporting for
development and debugging purposes.
"""
import asyncio
import json
from datetime import datetime
from typing import Any, Dict
import websockets
from fastmcp import FastMCP
from ..config.logging import logger
from ..config.settings import UNRAID_API_URL, UNRAID_API_KEY, UNRAID_VERIFY_SSL
from ..core.exceptions import ToolError
from .manager import subscription_manager
from .resources import ensure_subscriptions_started
def register_diagnostic_tools(mcp: FastMCP):
"""Register diagnostic tools with the FastMCP instance.
Args:
mcp: FastMCP instance to register tools with
"""
@mcp.tool()
async def test_subscription_query(subscription_query: str) -> Dict[str, Any]:
"""
Test a GraphQL subscription query directly to debug schema issues.
Use this to find working subscription field names and structure.
Args:
subscription_query: The GraphQL subscription query to test
Returns:
Dict containing test results and response data
"""
try:
logger.info(f"[TEST_SUBSCRIPTION] Testing query: {subscription_query}")
# Build WebSocket URL
ws_url = UNRAID_API_URL.replace("https://", "wss://").replace("http://", "ws://") + "/graphql"
# Test connection
async with websockets.connect(
ws_url,
subprotocols=["graphql-transport-ws", "graphql-ws"],
ssl=UNRAID_VERIFY_SSL,
ping_interval=30,
ping_timeout=10
) as websocket:
# Send connection init
await websocket.send(json.dumps({
"type": "connection_init",
"payload": {"Authorization": f"Bearer {UNRAID_API_KEY}"}
}))
# Wait for ack
response = await websocket.recv()
init_response = json.loads(response)
if init_response.get("type") != "connection_ack":
return {"error": f"Connection failed: {init_response}"}
# Send subscription
await websocket.send(json.dumps({
"id": "test",
"type": "start",
"payload": {"query": subscription_query}
}))
# Wait for response with timeout
try:
response = await asyncio.wait_for(websocket.recv(), timeout=5.0)
result = json.loads(response)
logger.info(f"[TEST_SUBSCRIPTION] Response: {result}")
return {
"success": True,
"response": result,
"query_tested": subscription_query
}
except asyncio.TimeoutError:
return {
"success": True,
"response": "No immediate response (subscriptions may only send data on changes)",
"query_tested": subscription_query,
"note": "Connection successful, subscription may be waiting for events"
}
except Exception as e:
logger.error(f"[TEST_SUBSCRIPTION] Error: {e}", exc_info=True)
return {
"error": str(e),
"query_tested": subscription_query
}
@mcp.tool()
async def diagnose_subscriptions() -> Dict[str, Any]:
"""
Comprehensive diagnostic tool for subscription system.
Shows detailed status, connection states, errors, and troubleshooting info.
Returns:
Dict containing comprehensive subscription system diagnostics
"""
# Ensure subscriptions are started before diagnosing
await ensure_subscriptions_started()
try:
logger.info("[DIAGNOSTIC] Running subscription diagnostics...")
# Get comprehensive status
status = subscription_manager.get_subscription_status()
# Add environment info
diagnostic_info = {
"timestamp": datetime.now().isoformat(),
"environment": {
"auto_start_enabled": subscription_manager.auto_start_enabled,
"max_reconnect_attempts": subscription_manager.max_reconnect_attempts,
"unraid_api_url": UNRAID_API_URL[:50] + "..." if UNRAID_API_URL else None,
"api_key_configured": bool(UNRAID_API_KEY),
"websocket_url": None
},
"subscriptions": status,
"summary": {
"total_configured": len(subscription_manager.subscription_configs),
"auto_start_count": sum(1 for s in subscription_manager.subscription_configs.values() if s.get("auto_start")),
"active_count": len(subscription_manager.active_subscriptions),
"with_data": len(subscription_manager.resource_data),
"in_error_state": 0,
"connection_issues": []
}
}
# Calculate WebSocket URL
if UNRAID_API_URL:
if UNRAID_API_URL.startswith('https://'):
ws_url = 'wss://' + UNRAID_API_URL[len('https://'):]
elif UNRAID_API_URL.startswith('http://'):
ws_url = 'ws://' + UNRAID_API_URL[len('http://'):]
else:
ws_url = UNRAID_API_URL
if not ws_url.endswith('/graphql'):
ws_url = ws_url.rstrip('/') + '/graphql'
diagnostic_info["environment"]["websocket_url"] = ws_url
# Analyze issues
for sub_name, sub_status in status.items():
runtime = sub_status.get("runtime", {})
connection_state = runtime.get("connection_state", "unknown")
if connection_state in ["error", "auth_failed", "timeout", "max_retries_exceeded"]:
diagnostic_info["summary"]["in_error_state"] += 1
if runtime.get("last_error"):
diagnostic_info["summary"]["connection_issues"].append({
"subscription": sub_name,
"state": connection_state,
"error": runtime["last_error"]
})
# Add troubleshooting recommendations
recommendations = []
if not diagnostic_info["environment"]["api_key_configured"]:
recommendations.append("CRITICAL: No API key configured. Set UNRAID_API_KEY environment variable.")
if diagnostic_info["summary"]["in_error_state"] > 0:
recommendations.append("Some subscriptions are in error state. Check 'connection_issues' for details.")
if diagnostic_info["summary"]["with_data"] == 0:
recommendations.append("No subscriptions have received data yet. Check WebSocket connectivity and authentication.")
if diagnostic_info["summary"]["active_count"] < diagnostic_info["summary"]["auto_start_count"]:
recommendations.append("Not all auto-start subscriptions are active. Check server startup logs.")
diagnostic_info["troubleshooting"] = {
"recommendations": recommendations,
"log_commands": [
"Check server logs for [WEBSOCKET:*], [AUTH:*], [SUBSCRIPTION:*] prefixed messages",
"Look for connection timeout or authentication errors",
"Verify Unraid API URL is accessible and supports GraphQL subscriptions"
],
"next_steps": [
"If authentication fails: Verify API key has correct permissions",
"If connection fails: Check network connectivity to Unraid server",
"If no data received: Enable DEBUG logging to see detailed protocol messages"
]
}
logger.info(f"[DIAGNOSTIC] Completed. Active: {diagnostic_info['summary']['active_count']}, With data: {diagnostic_info['summary']['with_data']}, Errors: {diagnostic_info['summary']['in_error_state']}")
return diagnostic_info
except Exception as e:
logger.error(f"[DIAGNOSTIC] Failed to generate diagnostics: {e}")
raise ToolError(f"Failed to generate diagnostics: {str(e)}")
logger.info("Subscription diagnostic tools registered successfully")

View File

@@ -0,0 +1,392 @@
"""WebSocket subscription manager for real-time Unraid data.
This module manages GraphQL subscriptions over WebSocket connections,
providing real-time data streaming for MCP resources with comprehensive
error handling, reconnection logic, and authentication.
"""
import asyncio
import json
import os
from datetime import datetime
from typing import Any, Dict, List, Optional
import websockets
from ..config.logging import logger
from ..config.settings import UNRAID_API_URL, UNRAID_API_KEY
from ..core.types import SubscriptionData
class SubscriptionManager:
"""Manages GraphQL subscriptions and converts them to MCP resources."""
def __init__(self):
self.active_subscriptions: Dict[str, asyncio.Task] = {}
self.resource_data: Dict[str, SubscriptionData] = {}
self.websocket: Optional[websockets.WebSocketServerProtocol] = None
self.subscription_lock = asyncio.Lock()
# Configuration
self.auto_start_enabled = os.getenv("UNRAID_AUTO_START_SUBSCRIPTIONS", "true").lower() == "true"
self.reconnect_attempts: Dict[str, int] = {}
self.max_reconnect_attempts = int(os.getenv("UNRAID_MAX_RECONNECT_ATTEMPTS", "10"))
self.connection_states: Dict[str, str] = {} # Track connection state per subscription
self.last_error: Dict[str, str] = {} # Track last error per subscription
# Define subscription configurations
self.subscription_configs = {
"logFileSubscription": {
"query": """
subscription LogFileSubscription($path: String!) {
logFile(path: $path) {
path
content
totalLines
}
}
""",
"resource": "unraid://logs/stream",
"description": "Real-time log file streaming",
"auto_start": False # Started manually with path parameter
}
}
logger.info(f"[SUBSCRIPTION_MANAGER] Initialized with auto_start={self.auto_start_enabled}, max_reconnects={self.max_reconnect_attempts}")
logger.debug(f"[SUBSCRIPTION_MANAGER] Available subscriptions: {list(self.subscription_configs.keys())}")
async def auto_start_all_subscriptions(self):
"""Auto-start all subscriptions marked for auto-start."""
if not self.auto_start_enabled:
logger.info("[SUBSCRIPTION_MANAGER] Auto-start disabled")
return
logger.info("[SUBSCRIPTION_MANAGER] Starting auto-start process...")
auto_start_count = 0
for subscription_name, config in self.subscription_configs.items():
if config.get("auto_start", False):
try:
logger.info(f"[SUBSCRIPTION_MANAGER] Auto-starting subscription: {subscription_name}")
await self.start_subscription(subscription_name, config["query"])
auto_start_count += 1
except Exception as e:
logger.error(f"[SUBSCRIPTION_MANAGER] Failed to auto-start {subscription_name}: {e}")
self.last_error[subscription_name] = str(e)
logger.info(f"[SUBSCRIPTION_MANAGER] Auto-start completed. Started {auto_start_count} subscriptions")
async def start_subscription(self, subscription_name: str, query: str, variables: Dict[str, Any] = None):
"""Start a GraphQL subscription and maintain it as a resource."""
logger.info(f"[SUBSCRIPTION:{subscription_name}] Starting subscription...")
if subscription_name in self.active_subscriptions:
logger.warning(f"[SUBSCRIPTION:{subscription_name}] Subscription already active, skipping")
return
# Reset connection tracking
self.reconnect_attempts[subscription_name] = 0
self.connection_states[subscription_name] = "starting"
async with self.subscription_lock:
try:
task = asyncio.create_task(self._subscription_loop(subscription_name, query, variables or {}))
self.active_subscriptions[subscription_name] = task
logger.info(f"[SUBSCRIPTION:{subscription_name}] Subscription task created and started")
self.connection_states[subscription_name] = "active"
except Exception as e:
logger.error(f"[SUBSCRIPTION:{subscription_name}] Failed to start subscription task: {e}")
self.connection_states[subscription_name] = "failed"
self.last_error[subscription_name] = str(e)
raise
async def stop_subscription(self, subscription_name: str):
"""Stop a specific subscription."""
logger.info(f"[SUBSCRIPTION:{subscription_name}] Stopping subscription...")
async with self.subscription_lock:
if subscription_name in self.active_subscriptions:
task = self.active_subscriptions[subscription_name]
task.cancel()
try:
await task
except asyncio.CancelledError:
logger.debug(f"[SUBSCRIPTION:{subscription_name}] Task cancelled successfully")
del self.active_subscriptions[subscription_name]
self.connection_states[subscription_name] = "stopped"
logger.info(f"[SUBSCRIPTION:{subscription_name}] Subscription stopped")
else:
logger.warning(f"[SUBSCRIPTION:{subscription_name}] No active subscription to stop")
async def _subscription_loop(self, subscription_name: str, query: str, variables: Dict[str, Any]):
"""Main loop for maintaining a GraphQL subscription with comprehensive logging."""
retry_delay = 5
max_retry_delay = 300 # 5 minutes max
while True:
attempt = self.reconnect_attempts.get(subscription_name, 0) + 1
self.reconnect_attempts[subscription_name] = attempt
logger.info(f"[WEBSOCKET:{subscription_name}] Connection attempt #{attempt} (max: {self.max_reconnect_attempts})")
if attempt > self.max_reconnect_attempts:
logger.error(f"[WEBSOCKET:{subscription_name}] Max reconnection attempts ({self.max_reconnect_attempts}) exceeded, stopping")
self.connection_states[subscription_name] = "max_retries_exceeded"
break
try:
# Build WebSocket URL with detailed logging
if UNRAID_API_URL.startswith('https://'):
ws_url = 'wss://' + UNRAID_API_URL[len('https://'):]
elif UNRAID_API_URL.startswith('http://'):
ws_url = 'ws://' + UNRAID_API_URL[len('http://'):]
else:
ws_url = UNRAID_API_URL
if not ws_url.endswith('/graphql'):
ws_url = ws_url.rstrip('/') + '/graphql'
logger.debug(f"[WEBSOCKET:{subscription_name}] Connecting to: {ws_url}")
logger.debug(f"[WEBSOCKET:{subscription_name}] API Key present: {'Yes' if UNRAID_API_KEY else 'No'}")
# Connection with timeout
connect_timeout = 10
logger.debug(f"[WEBSOCKET:{subscription_name}] Connection timeout: {connect_timeout}s")
async with websockets.connect(
ws_url,
subprotocols=["graphql-transport-ws", "graphql-ws"],
ping_interval=20,
ping_timeout=10,
close_timeout=10
) as websocket:
selected_proto = websocket.subprotocol or "none"
logger.info(f"[WEBSOCKET:{subscription_name}] Connected! Protocol: {selected_proto}")
self.connection_states[subscription_name] = "connected"
# Reset retry count on successful connection
self.reconnect_attempts[subscription_name] = 0
retry_delay = 5 # Reset delay
# Initialize GraphQL-WS protocol
logger.debug(f"[PROTOCOL:{subscription_name}] Initializing GraphQL-WS protocol...")
init_type = "connection_init"
init_payload: Dict[str, Any] = {"type": init_type}
if UNRAID_API_KEY:
logger.debug(f"[AUTH:{subscription_name}] Adding authentication payload")
auth_payload = {
"X-API-Key": UNRAID_API_KEY,
"x-api-key": UNRAID_API_KEY,
"authorization": f"Bearer {UNRAID_API_KEY}",
"Authorization": f"Bearer {UNRAID_API_KEY}",
"headers": {
"X-API-Key": UNRAID_API_KEY,
"x-api-key": UNRAID_API_KEY,
"Authorization": f"Bearer {UNRAID_API_KEY}"
}
}
init_payload["payload"] = auth_payload
else:
logger.warning(f"[AUTH:{subscription_name}] No API key available for authentication")
logger.debug(f"[PROTOCOL:{subscription_name}] Sending connection_init message")
await websocket.send(json.dumps(init_payload))
# Wait for connection acknowledgment
logger.debug(f"[PROTOCOL:{subscription_name}] Waiting for connection_ack...")
init_raw = await asyncio.wait_for(websocket.recv(), timeout=30)
try:
init_data = json.loads(init_raw)
logger.debug(f"[PROTOCOL:{subscription_name}] Received init response: {init_data.get('type')}")
except json.JSONDecodeError as e:
logger.error(f"[PROTOCOL:{subscription_name}] Failed to decode init response: {init_raw[:200]}...")
self.last_error[subscription_name] = f"Invalid JSON in init response: {e}"
break
# Handle connection acknowledgment
if init_data.get("type") == "connection_ack":
logger.info(f"[PROTOCOL:{subscription_name}] Connection acknowledged successfully")
self.connection_states[subscription_name] = "authenticated"
elif init_data.get("type") == "connection_error":
error_payload = init_data.get('payload', {})
logger.error(f"[AUTH:{subscription_name}] Authentication failed: {error_payload}")
self.last_error[subscription_name] = f"Authentication error: {error_payload}"
self.connection_states[subscription_name] = "auth_failed"
break
else:
logger.warning(f"[PROTOCOL:{subscription_name}] Unexpected init response: {init_data}")
# Continue anyway - some servers send other messages first
# Start the subscription
logger.debug(f"[SUBSCRIPTION:{subscription_name}] Starting GraphQL subscription...")
start_type = "subscribe" if selected_proto == "graphql-transport-ws" else "start"
subscription_message = {
"id": subscription_name,
"type": start_type,
"payload": {
"query": query,
"variables": variables
}
}
logger.debug(f"[SUBSCRIPTION:{subscription_name}] Subscription message type: {start_type}")
logger.debug(f"[SUBSCRIPTION:{subscription_name}] Query: {query[:100]}...")
logger.debug(f"[SUBSCRIPTION:{subscription_name}] Variables: {variables}")
await websocket.send(json.dumps(subscription_message))
logger.info(f"[SUBSCRIPTION:{subscription_name}] Subscription started successfully")
self.connection_states[subscription_name] = "subscribed"
# Listen for subscription data
message_count = 0
last_data_time = datetime.now()
async for message in websocket:
try:
data = json.loads(message)
message_count += 1
message_type = data.get('type', 'unknown')
logger.debug(f"[DATA:{subscription_name}] Message #{message_count}: {message_type}")
# Handle different message types
expected_data_type = "next" if selected_proto == "graphql-transport-ws" else "data"
if data.get("type") == expected_data_type and data.get("id") == subscription_name:
payload = data.get("payload", {})
if payload.get("data"):
logger.info(f"[DATA:{subscription_name}] Received subscription data update")
self.resource_data[subscription_name] = SubscriptionData(
data=payload["data"],
last_updated=datetime.now(),
subscription_type=subscription_name
)
last_data_time = datetime.now()
logger.debug(f"[RESOURCE:{subscription_name}] Resource data updated successfully")
elif payload.get("errors"):
logger.error(f"[DATA:{subscription_name}] GraphQL errors in response: {payload['errors']}")
self.last_error[subscription_name] = f"GraphQL errors: {payload['errors']}"
else:
logger.warning(f"[DATA:{subscription_name}] Empty or invalid data payload: {payload}")
elif data.get("type") == "ping":
logger.debug(f"[PROTOCOL:{subscription_name}] Received ping, sending pong")
await websocket.send(json.dumps({"type": "pong"}))
elif data.get("type") == "error":
error_payload = data.get('payload', {})
logger.error(f"[SUBSCRIPTION:{subscription_name}] Subscription error: {error_payload}")
self.last_error[subscription_name] = f"Subscription error: {error_payload}"
self.connection_states[subscription_name] = "error"
elif data.get("type") == "complete":
logger.info(f"[SUBSCRIPTION:{subscription_name}] Subscription completed by server")
self.connection_states[subscription_name] = "completed"
break
elif data.get("type") in ["ka", "ping", "pong"]:
logger.debug(f"[PROTOCOL:{subscription_name}] Keepalive message: {message_type}")
else:
logger.debug(f"[PROTOCOL:{subscription_name}] Unhandled message type: {message_type}")
except json.JSONDecodeError as e:
logger.error(f"[PROTOCOL:{subscription_name}] Failed to decode message: {message[:200]}...")
logger.error(f"[PROTOCOL:{subscription_name}] JSON decode error: {e}")
except Exception as e:
logger.error(f"[DATA:{subscription_name}] Error processing message: {e}")
logger.debug(f"[DATA:{subscription_name}] Raw message: {message[:200]}...")
except asyncio.TimeoutError:
error_msg = "Connection or authentication timeout"
logger.error(f"[WEBSOCKET:{subscription_name}] {error_msg}")
self.last_error[subscription_name] = error_msg
self.connection_states[subscription_name] = "timeout"
except websockets.exceptions.ConnectionClosed as e:
error_msg = f"WebSocket connection closed: {e}"
logger.warning(f"[WEBSOCKET:{subscription_name}] {error_msg}")
self.last_error[subscription_name] = error_msg
self.connection_states[subscription_name] = "disconnected"
except websockets.exceptions.InvalidURI as e:
error_msg = f"Invalid WebSocket URI: {e}"
logger.error(f"[WEBSOCKET:{subscription_name}] {error_msg}")
self.last_error[subscription_name] = error_msg
self.connection_states[subscription_name] = "invalid_uri"
break # Don't retry on invalid URI
except Exception as e:
error_msg = f"Unexpected error: {e}"
logger.error(f"[WEBSOCKET:{subscription_name}] {error_msg}")
self.last_error[subscription_name] = error_msg
self.connection_states[subscription_name] = "error"
# Calculate backoff delay
retry_delay = min(retry_delay * 1.5, max_retry_delay)
logger.info(f"[WEBSOCKET:{subscription_name}] Reconnecting in {retry_delay:.1f} seconds...")
self.connection_states[subscription_name] = "reconnecting"
await asyncio.sleep(retry_delay)
def get_resource_data(self, resource_name: str) -> Optional[Dict[str, Any]]:
"""Get current resource data with enhanced logging."""
logger.debug(f"[RESOURCE:{resource_name}] Resource data requested")
if resource_name in self.resource_data:
data = self.resource_data[resource_name]
age_seconds = (datetime.now() - data.last_updated).total_seconds()
logger.debug(f"[RESOURCE:{resource_name}] Data found, age: {age_seconds:.1f}s")
return data.data
else:
logger.debug(f"[RESOURCE:{resource_name}] No data available")
return None
def list_active_subscriptions(self) -> List[str]:
"""List all active subscriptions."""
active = list(self.active_subscriptions.keys())
logger.debug(f"[SUBSCRIPTION_MANAGER] Active subscriptions: {active}")
return active
def get_subscription_status(self) -> Dict[str, Dict[str, Any]]:
"""Get detailed status of all subscriptions for diagnostics."""
status = {}
for sub_name, config in self.subscription_configs.items():
sub_status = {
"config": {
"resource": config["resource"],
"description": config["description"],
"auto_start": config.get("auto_start", False)
},
"runtime": {
"active": sub_name in self.active_subscriptions,
"connection_state": self.connection_states.get(sub_name, "not_started"),
"reconnect_attempts": self.reconnect_attempts.get(sub_name, 0),
"last_error": self.last_error.get(sub_name, None)
}
}
# Add data info if available
if sub_name in self.resource_data:
data_info = self.resource_data[sub_name]
age_seconds = (datetime.now() - data_info.last_updated).total_seconds()
sub_status["data"] = {
"available": True,
"last_updated": data_info.last_updated.isoformat(),
"age_seconds": age_seconds
}
else:
sub_status["data"] = {"available": False}
status[sub_name] = sub_status
logger.debug(f"[SUBSCRIPTION_MANAGER] Generated status for {len(status)} subscriptions")
return status
# Global subscription manager instance
subscription_manager = SubscriptionManager()

View File

@@ -0,0 +1,91 @@
"""MCP resources that expose subscription data.
This module defines MCP resources that bridge between the subscription manager
and the MCP protocol, providing fallback queries when subscription data is unavailable.
"""
import json
import os
from pathlib import Path
from fastmcp import FastMCP
from ..config.logging import logger
from .manager import subscription_manager
# Global flag to track subscription startup
_subscriptions_started = False
async def ensure_subscriptions_started():
"""Ensure subscriptions are started, called from async context."""
global _subscriptions_started
if _subscriptions_started:
return
logger.info("[STARTUP] First async operation detected, starting subscriptions...")
try:
await autostart_subscriptions()
_subscriptions_started = True
logger.info("[STARTUP] Subscriptions started successfully")
except Exception as e:
logger.error(f"[STARTUP] Failed to start subscriptions: {e}", exc_info=True)
async def autostart_subscriptions():
"""Auto-start all subscriptions marked for auto-start in SubscriptionManager."""
logger.info("[AUTOSTART] Initiating subscription auto-start process...")
try:
# Use the new SubscriptionManager auto-start method
await subscription_manager.auto_start_all_subscriptions()
logger.info("[AUTOSTART] Auto-start process completed successfully")
except Exception as e:
logger.error(f"[AUTOSTART] Failed during auto-start process: {e}", exc_info=True)
# Optional log file subscription
log_path = os.getenv("UNRAID_AUTOSTART_LOG_PATH")
if log_path is None:
# Default to syslog if available
default_path = "/var/log/syslog"
if Path(default_path).exists():
log_path = default_path
logger.info(f"[AUTOSTART] Using default log path: {default_path}")
if log_path:
try:
logger.info(f"[AUTOSTART] Starting log file subscription for: {log_path}")
config = subscription_manager.subscription_configs.get("logFileSubscription")
if config:
await subscription_manager.start_subscription("logFileSubscription", config["query"], {"path": log_path})
logger.info(f"[AUTOSTART] Log file subscription started for: {log_path}")
else:
logger.error("[AUTOSTART] logFileSubscription config not found")
except Exception as e:
logger.error(f"[AUTOSTART] Failed to start log file subscription: {e}", exc_info=True)
else:
logger.info("[AUTOSTART] No log file path configured for auto-start")
def register_subscription_resources(mcp: FastMCP):
"""Register all subscription resources with the FastMCP instance.
Args:
mcp: FastMCP instance to register resources with
"""
@mcp.resource("unraid://logs/stream")
async def logs_stream_resource() -> str:
"""Real-time log stream data from subscription."""
await ensure_subscriptions_started()
data = subscription_manager.get_resource_data("logFileSubscription")
if data:
return json.dumps(data, indent=2)
return json.dumps({
"status": "No subscription data yet",
"message": "Subscriptions auto-start on server boot. If this persists, check server logs for WebSocket/auth issues."
})
logger.info("Subscription resources registered successfully")

View File

@@ -0,0 +1 @@
"""MCP tools organized by functional domain."""

387
unraid_mcp/tools/docker.py Normal file
View File

@@ -0,0 +1,387 @@
"""Docker container management tools.
This module provides tools for Docker container lifecycle and management
including listing containers with caching options, start/stop operations,
and detailed container information retrieval.
"""
from typing import Any
from fastmcp import FastMCP
from ..config.logging import logger
from ..core.client import make_graphql_request
from ..core.exceptions import ToolError
def find_container_by_identifier(container_identifier: str, containers: list[dict[str, Any]]) -> dict[str, Any] | None:
"""Find a container by ID or name with fuzzy matching.
Args:
container_identifier: Container ID or name to find
containers: List of container dictionaries to search
Returns:
Container dictionary if found, None otherwise
"""
if not containers:
return None
# Exact matches first
for container in containers:
if container.get("id") == container_identifier:
return container
# Check all names for exact match
names = container.get("names", [])
if container_identifier in names:
return container
# Fuzzy matching - case insensitive partial matches
container_identifier_lower = container_identifier.lower()
for container in containers:
names = container.get("names", [])
for name in names:
if container_identifier_lower in name.lower() or name.lower() in container_identifier_lower:
logger.info(f"Found container via fuzzy match: '{container_identifier}' -> '{name}'")
return container
return None
def get_available_container_names(containers: list[dict[str, Any]]) -> list[str]:
"""Extract all available container names for error reporting.
Args:
containers: List of container dictionaries
Returns:
List of container names
"""
names = []
for container in containers:
container_names = container.get("names", [])
names.extend(container_names)
return names
def register_docker_tools(mcp: FastMCP):
"""Register all Docker tools with the FastMCP instance.
Args:
mcp: FastMCP instance to register tools with
"""
@mcp.tool()
async def list_docker_containers() -> list[dict[str, Any]]:
"""Lists all Docker containers on the Unraid system.
Returns:
List of Docker container information dictionaries
"""
query = """
query ListDockerContainers {
docker {
containers(skipCache: false) {
id
names
image
state
status
autoStart
}
}
}
"""
try:
logger.info("Executing list_docker_containers tool")
response_data = await make_graphql_request(query)
if response_data.get("docker"):
return response_data["docker"].get("containers", [])
return []
except Exception as e:
logger.error(f"Error in list_docker_containers: {e}", exc_info=True)
raise ToolError(f"Failed to list Docker containers: {str(e)}")
@mcp.tool()
async def manage_docker_container(container_id: str, action: str) -> dict[str, Any]:
"""Starts or stops a specific Docker container. Action must be 'start' or 'stop'.
Args:
container_id: Container ID to manage
action: Action to perform - 'start' or 'stop'
Returns:
Dict containing operation result and container information
"""
import asyncio
if action.lower() not in ["start", "stop"]:
logger.warning(f"Invalid action '{action}' for manage_docker_container")
raise ToolError("Invalid action. Must be 'start' or 'stop'.")
mutation_name = action.lower()
# Step 1: Execute the operation mutation
operation_query = f"""
mutation ManageDockerContainer($id: PrefixedID!) {{
docker {{
{mutation_name}(id: $id) {{
id
names
state
status
}}
}}
}}
"""
variables = {"id": container_id}
try:
logger.info(f"Executing manage_docker_container: action={action}, id={container_id}")
# Step 1: Resolve container identifier to actual container ID if needed
actual_container_id = container_id
if not container_id.startswith("3cb1026338736ed07b8afec2c484e429710b0f6550dc65d0c5c410ea9d0fa6b2:"):
# This looks like a name, not a full container ID - need to resolve it
logger.info(f"Resolving container identifier '{container_id}' to actual container ID")
list_query = """
query ResolveContainerID {
docker {
containers(skipCache: true) {
id
names
}
}
}
"""
list_response = await make_graphql_request(list_query)
if list_response.get("docker"):
containers = list_response["docker"].get("containers", [])
resolved_container = find_container_by_identifier(container_id, containers)
if resolved_container:
actual_container_id = resolved_container.get("id")
logger.info(f"Resolved '{container_id}' to container ID: {actual_container_id}")
else:
available_names = get_available_container_names(containers)
error_msg = f"Container '{container_id}' not found for {action} operation."
if available_names:
error_msg += f" Available containers: {', '.join(available_names[:10])}"
raise ToolError(error_msg)
# Update variables with the actual container ID
variables = {"id": actual_container_id}
# Execute the operation with idempotent error handling
operation_context = {"operation": action}
operation_response = await make_graphql_request(
operation_query,
variables,
operation_context=operation_context
)
# Handle idempotent success case
if operation_response.get("idempotent_success"):
logger.info(f"Container {action} operation was idempotent: {operation_response.get('message')}")
# Get current container state since the operation was already complete
try:
list_query = """
query GetContainerStateAfterIdempotent($skipCache: Boolean!) {
docker {
containers(skipCache: $skipCache) {
id
names
image
state
status
autoStart
}
}
}
"""
list_response = await make_graphql_request(list_query, {"skipCache": True})
if list_response.get("docker"):
containers = list_response["docker"].get("containers", [])
container = find_container_by_identifier(container_id, containers)
if container:
return {
"operation_result": {"id": container_id, "names": container.get("names", [])},
"container_details": container,
"success": True,
"message": f"Container {action} operation was already complete - current state returned",
"idempotent": True
}
except Exception as lookup_error:
logger.warning(f"Could not retrieve container state after idempotent operation: {lookup_error}")
return {
"operation_result": {"id": container_id},
"container_details": None,
"success": True,
"message": f"Container {action} operation was already complete",
"idempotent": True
}
# Handle normal successful operation
if not (operation_response.get("docker") and operation_response["docker"].get(mutation_name)):
raise ToolError(f"Failed to execute {action} operation on container")
operation_result = operation_response["docker"][mutation_name]
logger.info(f"Container {action} operation completed for {container_id}")
# Step 2: Wait briefly for state to propagate, then fetch current container details
await asyncio.sleep(1.0) # Give the container state time to update
# Step 3: Try to get updated container details with retry logic
max_retries = 3
retry_delay = 1.0
for attempt in range(max_retries):
try:
# Query all containers and find the one we just operated on
list_query = """
query GetUpdatedContainerState($skipCache: Boolean!) {
docker {
containers(skipCache: $skipCache) {
id
names
image
state
status
autoStart
}
}
}
"""
# Skip cache to get fresh data
list_response = await make_graphql_request(list_query, {"skipCache": True})
if list_response.get("docker"):
containers = list_response["docker"].get("containers", [])
# Find the container using our helper function
container = find_container_by_identifier(container_id, containers)
if container:
logger.info(f"Found updated container state for {container_id}")
return {
"operation_result": operation_result,
"container_details": container,
"success": True,
"message": f"Container {action} operation completed successfully"
}
# If not found in this attempt, wait and retry
if attempt < max_retries - 1:
logger.warning(f"Container {container_id} not found after {action}, retrying in {retry_delay}s (attempt {attempt + 1}/{max_retries})")
await asyncio.sleep(retry_delay)
retry_delay *= 1.5 # Exponential backoff
except Exception as query_error:
logger.warning(f"Error querying updated container state (attempt {attempt + 1}): {query_error}")
if attempt < max_retries - 1:
await asyncio.sleep(retry_delay)
retry_delay *= 1.5
else:
# On final attempt failure, still return operation success
logger.warning(f"Could not retrieve updated container details after {action}, but operation succeeded")
return {
"operation_result": operation_result,
"container_details": None,
"success": True,
"message": f"Container {action} operation completed, but updated state could not be retrieved",
"warning": "Container state query failed after operation - this may be due to timing or the container not being found in the updated state"
}
# If we get here, all retries failed to find the container
logger.warning(f"Container {container_id} not found in any retry attempt after {action}")
return {
"operation_result": operation_result,
"container_details": None,
"success": True,
"message": f"Container {action} operation completed, but container not found in subsequent queries",
"warning": "Container not found in updated state - this may indicate the operation succeeded but container is no longer listed"
}
except Exception as e:
logger.error(f"Error in manage_docker_container ({action}): {e}", exc_info=True)
raise ToolError(f"Failed to {action} Docker container: {str(e)}")
@mcp.tool()
async def get_docker_container_details(container_identifier: str) -> dict[str, Any]:
"""Retrieves detailed information for a specific Docker container by its ID or name.
Args:
container_identifier: Container ID or name to retrieve details for
Returns:
Dict containing detailed container information
"""
# This tool fetches all containers and then filters by ID or name.
# More detailed query for a single container if found:
detailed_query_fields = """
id
names
image
imageId
command
created
ports { ip privatePort publicPort type }
sizeRootFs
labels # JSONObject
state
status
hostConfig { networkMode }
networkSettings # JSONObject
mounts # JSONObject array
autoStart
"""
# Fetch all containers first
list_query = f"""
query GetAllContainerDetailsForFiltering {{
docker {{
containers(skipCache: false) {{
{detailed_query_fields}
}}
}}
}}
"""
try:
logger.info(f"Executing get_docker_container_details for identifier: {container_identifier}")
response_data = await make_graphql_request(list_query)
containers = []
if response_data.get("docker"):
containers = response_data["docker"].get("containers", [])
# Use our enhanced container lookup
container = find_container_by_identifier(container_identifier, containers)
if container:
logger.info(f"Found container {container_identifier}")
return container
# Container not found - provide helpful error message with available containers
available_names = get_available_container_names(containers)
logger.warning(f"Container with identifier '{container_identifier}' not found.")
logger.info(f"Available containers: {available_names}")
error_msg = f"Container '{container_identifier}' not found."
if available_names:
error_msg += f" Available containers: {', '.join(available_names[:10])}" # Limit to first 10
if len(available_names) > 10:
error_msg += f" (and {len(available_names) - 10} more)"
else:
error_msg += " No containers are currently available."
raise ToolError(error_msg)
except Exception as e:
logger.error(f"Error in get_docker_container_details: {e}", exc_info=True)
raise ToolError(f"Failed to retrieve Docker container details: {str(e)}")
logger.info("Docker tools registered successfully")

187
unraid_mcp/tools/health.py Normal file
View File

@@ -0,0 +1,187 @@
"""Comprehensive health monitoring tools.
This module provides tools for comprehensive health checks of the Unraid MCP server
and the underlying Unraid system, including performance metrics, system status,
notifications, Docker services, and API responsiveness.
"""
import datetime
import time
from typing import Any, Dict
from fastmcp import FastMCP
from ..config.logging import logger
from ..config.settings import UNRAID_API_URL, UNRAID_MCP_HOST, UNRAID_MCP_PORT, UNRAID_MCP_TRANSPORT
from ..core.client import make_graphql_request
from ..core.exceptions import ToolError
def register_health_tools(mcp: FastMCP):
"""Register all health tools with the FastMCP instance.
Args:
mcp: FastMCP instance to register tools with
"""
@mcp.tool()
async def health_check() -> Dict[str, Any]:
"""Returns comprehensive health status of the Unraid MCP server and system for monitoring purposes."""
start_time = time.time()
health_status = "healthy"
issues = []
try:
# Enhanced health check with multiple system components
comprehensive_query = """
query ComprehensiveHealthCheck {
info {
machineId
time
versions { unraid }
os { uptime }
}
array {
state
}
notifications {
overview {
unread { alert warning total }
}
}
docker {
containers(skipCache: true) {
id
state
status
}
}
}
"""
response_data = await make_graphql_request(comprehensive_query)
api_latency = round((time.time() - start_time) * 1000, 2) # ms
# Base health info
health_info = {
"status": health_status,
"timestamp": datetime.datetime.utcnow().isoformat(),
"api_latency_ms": api_latency,
"server": {
"name": "Unraid MCP Server",
"version": "0.1.0",
"transport": UNRAID_MCP_TRANSPORT,
"host": UNRAID_MCP_HOST,
"port": UNRAID_MCP_PORT,
"process_uptime_seconds": time.time() - start_time # Rough estimate
}
}
if not response_data:
health_status = "unhealthy"
issues.append("No response from Unraid API")
health_info["status"] = health_status
health_info["issues"] = issues
return health_info
# System info analysis
info = response_data.get("info", {})
if info:
health_info["unraid_system"] = {
"status": "connected",
"url": UNRAID_API_URL,
"machine_id": info.get("machineId"),
"time": info.get("time"),
"version": info.get("versions", {}).get("unraid"),
"uptime": info.get("os", {}).get("uptime")
}
else:
health_status = "degraded"
issues.append("Unable to retrieve system info")
# Array health analysis
array_info = response_data.get("array", {})
if array_info:
array_state = array_info.get("state", "unknown")
health_info["array_status"] = {
"state": array_state,
"healthy": array_state in ["STARTED", "STOPPED"]
}
if array_state not in ["STARTED", "STOPPED"]:
health_status = "warning"
issues.append(f"Array in unexpected state: {array_state}")
else:
health_status = "warning"
issues.append("Unable to retrieve array status")
# Notifications analysis
notifications = response_data.get("notifications", {})
if notifications and notifications.get("overview"):
unread = notifications["overview"].get("unread", {})
alert_count = unread.get("alert", 0)
warning_count = unread.get("warning", 0)
total_unread = unread.get("total", 0)
health_info["notifications"] = {
"unread_total": total_unread,
"unread_alerts": alert_count,
"unread_warnings": warning_count,
"has_critical_notifications": alert_count > 0
}
if alert_count > 0:
health_status = "warning"
issues.append(f"{alert_count} unread alert notification(s)")
# Docker services analysis
docker_info = response_data.get("docker", {})
if docker_info and docker_info.get("containers"):
containers = docker_info["containers"]
running_containers = [c for c in containers if c.get("state") == "running"]
stopped_containers = [c for c in containers if c.get("state") == "exited"]
health_info["docker_services"] = {
"total_containers": len(containers),
"running_containers": len(running_containers),
"stopped_containers": len(stopped_containers),
"containers_healthy": len([c for c in containers if c.get("status", "").startswith("Up")])
}
# API performance assessment
if api_latency > 5000: # > 5 seconds
health_status = "warning"
issues.append(f"High API latency: {api_latency}ms")
elif api_latency > 10000: # > 10 seconds
health_status = "degraded"
issues.append(f"Very high API latency: {api_latency}ms")
# Final status determination
health_info["status"] = health_status
if issues:
health_info["issues"] = issues
# Add performance metrics
health_info["performance"] = {
"api_response_time_ms": api_latency,
"health_check_duration_ms": round((time.time() - start_time) * 1000, 2)
}
return health_info
except Exception as e:
logger.error(f"Health check failed: {e}")
return {
"status": "unhealthy",
"timestamp": datetime.datetime.utcnow().isoformat(),
"error": str(e),
"api_latency_ms": round((time.time() - start_time) * 1000, 2) if 'start_time' in locals() else None,
"server": {
"name": "Unraid MCP Server",
"version": "0.1.0",
"transport": UNRAID_MCP_TRANSPORT,
"host": UNRAID_MCP_HOST,
"port": UNRAID_MCP_PORT
}
}
logger.info("Health tools registered successfully")

178
unraid_mcp/tools/rclone.py Normal file
View File

@@ -0,0 +1,178 @@
"""RClone cloud storage remote management tools.
This module provides tools for managing RClone remotes including listing existing
remotes, getting configuration forms, creating new remotes, and deleting remotes
for various cloud storage providers (S3, Google Drive, Dropbox, FTP, etc.).
"""
from typing import Any, Dict, List, Optional
from fastmcp import FastMCP
from ..config.logging import logger
from ..core.client import make_graphql_request
from ..core.exceptions import ToolError
def register_rclone_tools(mcp: FastMCP):
"""Register all RClone tools with the FastMCP instance.
Args:
mcp: FastMCP instance to register tools with
"""
@mcp.tool()
async def list_rclone_remotes() -> List[Dict[str, Any]]:
"""Retrieves all configured RClone remotes with their configuration details."""
try:
query = """
query ListRCloneRemotes {
rclone {
remotes {
name
type
parameters
config
}
}
}
"""
response_data = await make_graphql_request(query)
if "rclone" in response_data and "remotes" in response_data["rclone"]:
remotes = response_data["rclone"]["remotes"]
logger.info(f"Retrieved {len(remotes)} RClone remotes")
return remotes
return []
except Exception as e:
logger.error(f"Failed to list RClone remotes: {str(e)}")
raise ToolError(f"Failed to list RClone remotes: {str(e)}")
@mcp.tool()
async def get_rclone_config_form(provider_type: Optional[str] = None) -> Dict[str, Any]:
"""
Get RClone configuration form schema for setting up new remotes.
Args:
provider_type: Optional provider type to get specific form (e.g., 's3', 'drive', 'dropbox')
"""
try:
query = """
query GetRCloneConfigForm($formOptions: RCloneConfigFormInput) {
rclone {
configForm(formOptions: $formOptions) {
id
dataSchema
uiSchema
}
}
}
"""
variables = {}
if provider_type:
variables["formOptions"] = {"providerType": provider_type}
response_data = await make_graphql_request(query, variables)
if "rclone" in response_data and "configForm" in response_data["rclone"]:
form_data = response_data["rclone"]["configForm"]
logger.info(f"Retrieved RClone config form for {provider_type or 'general'}")
return form_data
raise ToolError("No RClone config form data received")
except Exception as e:
logger.error(f"Failed to get RClone config form: {str(e)}")
raise ToolError(f"Failed to get RClone config form: {str(e)}")
@mcp.tool()
async def create_rclone_remote(name: str, provider_type: str, config_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Create a new RClone remote with the specified configuration.
Args:
name: Name for the new remote
provider_type: Type of provider (e.g., 's3', 'drive', 'dropbox', 'ftp')
config_data: Configuration parameters specific to the provider type
"""
try:
mutation = """
mutation CreateRCloneRemote($input: CreateRCloneRemoteInput!) {
rclone {
createRCloneRemote(input: $input) {
name
type
parameters
}
}
}
"""
variables = {
"input": {
"name": name,
"type": provider_type,
"config": config_data
}
}
response_data = await make_graphql_request(mutation, variables)
if "rclone" in response_data and "createRCloneRemote" in response_data["rclone"]:
remote_info = response_data["rclone"]["createRCloneRemote"]
logger.info(f"Successfully created RClone remote: {name}")
return {
"success": True,
"message": f"RClone remote '{name}' created successfully",
"remote": remote_info
}
raise ToolError("Failed to create RClone remote")
except Exception as e:
logger.error(f"Failed to create RClone remote {name}: {str(e)}")
raise ToolError(f"Failed to create RClone remote {name}: {str(e)}")
@mcp.tool()
async def delete_rclone_remote(name: str) -> Dict[str, Any]:
"""
Delete an existing RClone remote by name.
Args:
name: Name of the remote to delete
"""
try:
mutation = """
mutation DeleteRCloneRemote($input: DeleteRCloneRemoteInput!) {
rclone {
deleteRCloneRemote(input: $input)
}
}
"""
variables = {
"input": {
"name": name
}
}
response_data = await make_graphql_request(mutation, variables)
if "rclone" in response_data and response_data["rclone"]["deleteRCloneRemote"]:
logger.info(f"Successfully deleted RClone remote: {name}")
return {
"success": True,
"message": f"RClone remote '{name}' deleted successfully"
}
raise ToolError(f"Failed to delete RClone remote '{name}'")
except Exception as e:
logger.error(f"Failed to delete RClone remote {name}: {str(e)}")
raise ToolError(f"Failed to delete RClone remote {name}: {str(e)}")
logger.info("RClone tools registered successfully")

270
unraid_mcp/tools/storage.py Normal file
View File

@@ -0,0 +1,270 @@
"""Storage, disk, and notification management tools.
This module provides tools for managing user shares, notifications,
log files, physical disks with SMART data, and system storage operations
with custom timeout configurations for disk-intensive operations.
"""
from typing import Any, Dict, List, Optional
import httpx
from fastmcp import FastMCP
from ..config.logging import logger
from ..core.client import make_graphql_request
from ..core.exceptions import ToolError
def register_storage_tools(mcp: FastMCP):
"""Register all storage tools with the FastMCP instance.
Args:
mcp: FastMCP instance to register tools with
"""
@mcp.tool()
async def get_shares_info() -> List[Dict[str, Any]]:
"""Retrieves information about user shares."""
query = """
query GetSharesInfo {
shares {
id
name
free
used
size
include
exclude
cache
nameOrig
comment
allocator
splitLevel
floor
cow
color
luksStatus
}
}
"""
try:
logger.info("Executing get_shares_info tool")
response_data = await make_graphql_request(query)
return response_data.get("shares", [])
except Exception as e:
logger.error(f"Error in get_shares_info: {e}", exc_info=True)
raise ToolError(f"Failed to retrieve shares information: {str(e)}")
@mcp.tool()
async def get_notifications_overview() -> Dict[str, Any]:
"""Retrieves an overview of system notifications (unread and archive counts by severity)."""
query = """
query GetNotificationsOverview {
notifications {
overview {
unread { info warning alert total }
archive { info warning alert total }
}
}
}
"""
try:
logger.info("Executing get_notifications_overview tool")
response_data = await make_graphql_request(query)
if response_data.get("notifications"):
return response_data["notifications"].get("overview", {})
return {}
except Exception as e:
logger.error(f"Error in get_notifications_overview: {e}", exc_info=True)
raise ToolError(f"Failed to retrieve notifications overview: {str(e)}")
@mcp.tool()
async def list_notifications(
type: str,
offset: int,
limit: int,
importance: Optional[str] = None
) -> List[Dict[str, Any]]:
"""Lists notifications with filtering. Type: UNREAD/ARCHIVE. Importance: INFO/WARNING/ALERT."""
query = """
query ListNotifications($filter: NotificationFilter!) {
notifications {
list(filter: $filter) {
id
title
subject
description
importance
link
type
timestamp
formattedTimestamp
}
}
}
"""
variables = {
"filter": {
"type": type.upper(),
"offset": offset,
"limit": limit,
"importance": importance.upper() if importance else None
}
}
# Remove null importance from variables if not provided, as GraphQL might be strict
if not importance:
del variables["filter"]["importance"]
try:
logger.info(f"Executing list_notifications: type={type}, offset={offset}, limit={limit}, importance={importance}")
response_data = await make_graphql_request(query, variables)
if response_data.get("notifications"):
return response_data["notifications"].get("list", [])
return []
except Exception as e:
logger.error(f"Error in list_notifications: {e}", exc_info=True)
raise ToolError(f"Failed to list notifications: {str(e)}")
@mcp.tool()
async def list_available_log_files() -> List[Dict[str, Any]]:
"""Lists all available log files that can be queried."""
query = """
query ListLogFiles {
logFiles {
name
path
size
modifiedAt
}
}
"""
try:
logger.info("Executing list_available_log_files tool")
response_data = await make_graphql_request(query)
return response_data.get("logFiles", [])
except Exception as e:
logger.error(f"Error in list_available_log_files: {e}", exc_info=True)
raise ToolError(f"Failed to list available log files: {str(e)}")
@mcp.tool()
async def get_logs(log_file_path: str, tail_lines: int = 100) -> Dict[str, Any]:
"""Retrieves content from a specific log file, defaulting to the last 100 lines."""
# The Unraid GraphQL API Query.logFile takes 'lines' and 'startLine'.
# To implement 'tail_lines', we would ideally need to know the total lines first,
# then calculate startLine. However, Query.logFile itself returns totalLines.
# A simple approach for 'tail' is to request a large number of lines if totalLines is not known beforehand,
# and let the API handle it, or make two calls (one to get totalLines, then another).
# For now, let's assume 'lines' parameter in Query.logFile effectively means tail if startLine is not given.
# If not, this tool might need to be smarter or the API might not directly support 'tail' easily.
# The SDL for LogFileContent implies it returns startLine, so it seems aware of ranges.
# Let's try fetching with just 'lines' to see if it acts as a tail,
# or if we need Query.logFiles first to get totalLines for calculation.
# For robust tailing, one might need to fetch totalLines first, then calculate start_line for the tail.
# Simplified: query for the last 'tail_lines'. If the API doesn't support tailing this way, we may need adjustment.
# The current plan is to pass 'lines=tail_lines' directly.
query = """
query GetLogContent($path: String!, $lines: Int) {
logFile(path: $path, lines: $lines) {
path
content
totalLines
startLine
}
}
"""
variables = {"path": log_file_path, "lines": tail_lines}
try:
logger.info(f"Executing get_logs for {log_file_path}, tail_lines={tail_lines}")
response_data = await make_graphql_request(query, variables)
return response_data.get("logFile", {})
except Exception as e:
logger.error(f"Error in get_logs for {log_file_path}: {e}", exc_info=True)
raise ToolError(f"Failed to retrieve logs from {log_file_path}: {str(e)}")
@mcp.tool()
async def list_physical_disks() -> List[Dict[str, Any]]:
"""Lists all physical disks recognized by the Unraid system."""
# Querying an extremely minimal set of fields for diagnostics
query = """
query ListPhysicalDisksMinimal {
disks {
id
device
name
}
}
"""
try:
logger.info("Executing list_physical_disks tool with minimal query and increased timeout")
# Increased read timeout for this potentially slow query
long_timeout = httpx.Timeout(10.0, read=90.0, connect=5.0)
response_data = await make_graphql_request(query, custom_timeout=long_timeout)
return response_data.get("disks", [])
except Exception as e:
logger.error(f"Error in list_physical_disks: {e}", exc_info=True)
raise ToolError(f"Failed to list physical disks: {str(e)}")
@mcp.tool()
async def get_disk_details(disk_id: str) -> Dict[str, Any]:
"""Retrieves detailed SMART information and partition data for a specific physical disk."""
# Enhanced query with more comprehensive disk information
query = """
query GetDiskDetails($id: PrefixedID!) {
disk(id: $id) {
id
device
name
serialNum
size
temperature
}
}
"""
variables = {"id": disk_id}
try:
logger.info(f"Executing get_disk_details for disk: {disk_id}")
response_data = await make_graphql_request(query, variables)
raw_disk = response_data.get("disk", {})
if not raw_disk:
raise ToolError(f"Disk '{disk_id}' not found")
# Process disk information for human-readable output
def format_bytes(bytes_value):
if bytes_value is None: return "N/A"
bytes_value = int(bytes_value)
for unit in ['B', 'KB', 'MB', 'GB', 'TB', 'PB']:
if bytes_value < 1024.0:
return f"{bytes_value:.2f} {unit}"
bytes_value /= 1024.0
return f"{bytes_value:.2f} EB"
summary = {
'disk_id': raw_disk.get('id'),
'device': raw_disk.get('device'),
'name': raw_disk.get('name'),
'serial_number': raw_disk.get('serialNum'),
'size_formatted': format_bytes(raw_disk.get('size')),
'temperature': f"{raw_disk.get('temperature')}°C" if raw_disk.get('temperature') else 'N/A',
'interface_type': raw_disk.get('interfaceType'),
'smart_status': raw_disk.get('smartStatus'),
'is_spinning': raw_disk.get('isSpinning'),
'power_on_hours': raw_disk.get('powerOnHours'),
'reallocated_sectors': raw_disk.get('reallocatedSectorCount'),
'partition_count': len(raw_disk.get('partitions', [])),
'total_partition_size': format_bytes(sum(p.get('size', 0) for p in raw_disk.get('partitions', []) if p.get('size')))
}
return {
'summary': summary,
'partitions': raw_disk.get('partitions', []),
'details': raw_disk
}
except Exception as e:
logger.error(f"Error in get_disk_details for {disk_id}: {e}", exc_info=True)
raise ToolError(f"Failed to retrieve disk details for {disk_id}: {str(e)}")
logger.info("Storage tools registered successfully")

385
unraid_mcp/tools/system.py Normal file
View File

@@ -0,0 +1,385 @@
"""System information and array status tools.
This module provides tools for retrieving core Unraid system information,
array status with health analysis, network configuration, registration info,
and system variables.
"""
from typing import Any, Dict
from fastmcp import FastMCP
from ..config.logging import logger
from ..core.client import make_graphql_request
from ..core.exceptions import ToolError
# Standalone functions for use by subscription resources
async def _get_system_info() -> Dict[str, Any]:
"""Standalone function to get system info - used by subscriptions and tools."""
query = """
query GetSystemInfo {
info {
os { platform distro release codename kernel arch hostname codepage logofile serial build uptime }
cpu { manufacturer brand vendor family model stepping revision voltage speed speedmin speedmax threads cores processors socket cache flags }
memory {
# Avoid fetching problematic fields that cause type errors
layout { bank type clockSpeed formFactor manufacturer partNum serialNum }
}
baseboard { manufacturer model version serial assetTag }
system { manufacturer model version serial uuid sku }
versions { kernel openssl systemOpenssl systemOpensslLib node v8 npm yarn pm2 gulp grunt git tsc mysql redis mongodb apache nginx php docker postfix postgresql perl python gcc unraid }
apps { installed started }
# Remove devices section as it has non-nullable fields that might be null
machineId
time
}
}
"""
try:
logger.info("Executing get_system_info")
response_data = await make_graphql_request(query)
raw_info = response_data.get("info", {})
if not raw_info:
raise ToolError("No system info returned from Unraid API")
# Process for human-readable output
summary = {}
if raw_info.get('os'):
os_info = raw_info['os']
summary['os'] = f"{os_info.get('distro', '')} {os_info.get('release', '')} ({os_info.get('platform', '')}, {os_info.get('arch', '')})"
summary['hostname'] = os_info.get('hostname')
summary['uptime'] = os_info.get('uptime')
if raw_info.get('cpu'):
cpu_info = raw_info['cpu']
summary['cpu'] = f"{cpu_info.get('manufacturer', '')} {cpu_info.get('brand', '')} ({cpu_info.get('cores')} cores, {cpu_info.get('threads')} threads)"
if raw_info.get('memory') and raw_info['memory'].get('layout'):
mem_layout = raw_info['memory']['layout']
summary['memory_layout_details'] = [] # Renamed for clarity
# The API is not returning 'size' for individual sticks in the layout, even if queried.
# So, we cannot calculate total from layout currently.
for stick in mem_layout:
# stick_size = stick.get('size') # This is None in the actual API response
summary['memory_layout_details'].append(
f"Bank {stick.get('bank', '?')}: Type {stick.get('type', '?')}, Speed {stick.get('clockSpeed', '?')}MHz, Manufacturer: {stick.get('manufacturer','?')}, Part: {stick.get('partNum', '?')}"
)
summary['memory_summary'] = "Stick layout details retrieved. Overall total/used/free memory stats are unavailable due to API limitations (Int overflow or data not provided by API)."
else:
summary['memory_summary'] = "Memory information (layout or stats) not available or failed to retrieve."
# Include a key for the full details if needed by an LLM for deeper dives
return {"summary": summary, "details": raw_info}
except Exception as e:
logger.error(f"Error in get_system_info: {e}", exc_info=True)
raise ToolError(f"Failed to retrieve system information: {str(e)}")
async def _get_array_status() -> Dict[str, Any]:
"""Standalone function to get array status - used by subscriptions and tools."""
query = """
query GetArrayStatus {
array {
id
state
capacity {
kilobytes { free used total }
disks { free used total }
}
boot { id idx name device size status rotational temp numReads numWrites numErrors fsSize fsFree fsUsed exportable type warning critical fsType comment format transport color }
parities { id idx name device size status rotational temp numReads numWrites numErrors fsSize fsFree fsUsed exportable type warning critical fsType comment format transport color }
disks { id idx name device size status rotational temp numReads numWrites numErrors fsSize fsFree fsUsed exportable type warning critical fsType comment format transport color }
caches { id idx name device size status rotational temp numReads numWrites numErrors fsSize fsFree fsUsed exportable type warning critical fsType comment format transport color }
}
}
"""
try:
logger.info("Executing get_array_status")
response_data = await make_graphql_request(query)
raw_array_info = response_data.get("array", {})
if not raw_array_info:
raise ToolError("No array information returned from Unraid API")
summary = {}
summary['state'] = raw_array_info.get('state')
if raw_array_info.get('capacity') and raw_array_info['capacity'].get('kilobytes'):
kb_cap = raw_array_info['capacity']['kilobytes']
# Helper to format KB into TB/GB/MB
def format_kb(k):
if k is None: return "N/A"
k = int(k) # Values are strings in SDL for PrefixedID containing types like capacity
if k >= 1024*1024*1024: return f"{k / (1024*1024*1024):.2f} TB"
if k >= 1024*1024: return f"{k / (1024*1024):.2f} GB"
if k >= 1024: return f"{k / 1024:.2f} MB"
return f"{k} KB"
summary['capacity_total'] = format_kb(kb_cap.get('total'))
summary['capacity_used'] = format_kb(kb_cap.get('used'))
summary['capacity_free'] = format_kb(kb_cap.get('free'))
summary['num_parity_disks'] = len(raw_array_info.get('parities', []))
summary['num_data_disks'] = len(raw_array_info.get('disks', []))
summary['num_cache_pools'] = len(raw_array_info.get('caches', [])) # Note: caches are pools, not individual cache disks
# Enhanced: Add disk health summary
def analyze_disk_health(disks, disk_type):
"""Analyze health status of disk arrays"""
if not disks:
return {}
health_counts = {
'healthy': 0,
'failed': 0,
'missing': 0,
'new': 0,
'warning': 0,
'unknown': 0
}
for disk in disks:
status = disk.get('status', '').upper()
warning = disk.get('warning')
critical = disk.get('critical')
if status == 'DISK_OK':
if warning or critical:
health_counts['warning'] += 1
else:
health_counts['healthy'] += 1
elif status in ['DISK_DSBL', 'DISK_INVALID']:
health_counts['failed'] += 1
elif status == 'DISK_NP':
health_counts['missing'] += 1
elif status == 'DISK_NEW':
health_counts['new'] += 1
else:
health_counts['unknown'] += 1
return health_counts
# Analyze health for each disk type
health_summary = {}
if raw_array_info.get('parities'):
health_summary['parity_health'] = analyze_disk_health(raw_array_info['parities'], 'parity')
if raw_array_info.get('disks'):
health_summary['data_health'] = analyze_disk_health(raw_array_info['disks'], 'data')
if raw_array_info.get('caches'):
health_summary['cache_health'] = analyze_disk_health(raw_array_info['caches'], 'cache')
# Overall array health assessment
total_failed = sum(h.get('failed', 0) for h in health_summary.values())
total_missing = sum(h.get('missing', 0) for h in health_summary.values())
total_warning = sum(h.get('warning', 0) for h in health_summary.values())
if total_failed > 0:
overall_health = "CRITICAL"
elif total_missing > 0:
overall_health = "DEGRADED"
elif total_warning > 0:
overall_health = "WARNING"
else:
overall_health = "HEALTHY"
summary['overall_health'] = overall_health
summary['health_summary'] = health_summary
return {"summary": summary, "details": raw_array_info}
except Exception as e:
logger.error(f"Error in get_array_status: {e}", exc_info=True)
raise ToolError(f"Failed to retrieve array status: {str(e)}")
def register_system_tools(mcp: FastMCP):
"""Register all system tools with the FastMCP instance.
Args:
mcp: FastMCP instance to register tools with
"""
@mcp.tool()
async def get_system_info() -> Dict[str, Any]:
"""Retrieves comprehensive information about the Unraid system, OS, CPU, memory, and baseboard."""
return await _get_system_info()
@mcp.tool()
async def get_array_status() -> Dict[str, Any]:
"""Retrieves the current status of the Unraid storage array, including its state, capacity, and details of all disks."""
return await _get_array_status()
@mcp.tool()
async def get_network_config() -> Dict[str, Any]:
"""Retrieves network configuration details, including access URLs."""
query = """
query GetNetworkConfig {
network {
id
accessUrls { type name ipv4 ipv6 }
}
}
"""
try:
logger.info("Executing get_network_config tool")
response_data = await make_graphql_request(query)
return response_data.get("network", {})
except Exception as e:
logger.error(f"Error in get_network_config: {e}", exc_info=True)
raise ToolError(f"Failed to retrieve network configuration: {str(e)}")
@mcp.tool()
async def get_registration_info() -> Dict[str, Any]:
"""Retrieves Unraid registration details."""
query = """
query GetRegistrationInfo {
registration {
id
type
keyFile { location contents }
state
expiration
updateExpiration
}
}
"""
try:
logger.info("Executing get_registration_info tool")
response_data = await make_graphql_request(query)
return response_data.get("registration", {})
except Exception as e:
logger.error(f"Error in get_registration_info: {e}", exc_info=True)
raise ToolError(f"Failed to retrieve registration information: {str(e)}")
@mcp.tool()
async def get_connect_settings() -> Dict[str, Any]:
"""Retrieves settings related to Unraid Connect."""
# Based on actual schema: settings.unified.values contains the JSON settings
query = """
query GetConnectSettingsForm {
settings {
unified {
values
}
}
}
"""
try:
logger.info("Executing get_connect_settings tool")
response_data = await make_graphql_request(query)
# Navigate down to the unified settings values
if response_data.get("settings") and response_data["settings"].get("unified"):
values = response_data["settings"]["unified"].get("values", {})
# Filter for Connect-related settings if values is a dict
if isinstance(values, dict):
# Look for connect-related keys in the unified settings
connect_settings = {}
for key, value in values.items():
if 'connect' in key.lower() or key in ['accessType', 'forwardType', 'port']:
connect_settings[key] = value
return connect_settings if connect_settings else values
return values
return {}
except Exception as e:
logger.error(f"Error in get_connect_settings: {e}", exc_info=True)
raise ToolError(f"Failed to retrieve Unraid Connect settings: {str(e)}")
@mcp.tool()
async def get_unraid_variables() -> Dict[str, Any]:
"""Retrieves a selection of Unraid system variables and settings.
Note: Many variables are omitted due to API type issues (Int overflow/NaN).
"""
# Querying a smaller, curated set of fields to avoid Int overflow and NaN issues
# pending Unraid API schema fixes for the full Vars type.
query = """
query GetSelectiveUnraidVariables {
vars {
id
version
name
timeZone
comment
security
workgroup
domain
domainShort
hideDotFiles
localMaster
enableFruit
useNtp
# ntpServer1, ntpServer2, ... are strings, should be okay but numerous
domainLogin # Boolean
sysModel # String
# sysArraySlots, sysCacheSlots are Int, were problematic (NaN)
sysFlashSlots # Int, might be okay if small and always set
useSsl # Boolean
port # Int, usually small
portssl # Int, usually small
localTld # String
bindMgt # Boolean
useTelnet # Boolean
porttelnet # Int, usually small
useSsh # Boolean
portssh # Int, usually small
startPage # String
startArray # Boolean
# spindownDelay, queueDepth are Int, potentially okay if always set
# defaultFormat, defaultFsType are String
shutdownTimeout # Int, potentially okay
# luksKeyfile is String
# pollAttributes, pollAttributesDefault, pollAttributesStatus are Int/String, were problematic (NaN or type)
# nrRequests, nrRequestsDefault, nrRequestsStatus were problematic
# mdNumStripes, mdNumStripesDefault, mdNumStripesStatus were problematic
# mdSyncWindow, mdSyncWindowDefault, mdSyncWindowStatus were problematic
# mdSyncThresh, mdSyncThreshDefault, mdSyncThreshStatus were problematic
# mdWriteMethod, mdWriteMethodDefault, mdWriteMethodStatus were problematic
# shareDisk, shareUser, shareUserInclude, shareUserExclude are String arrays/String
shareSmbEnabled # Boolean
shareNfsEnabled # Boolean
shareAfpEnabled # Boolean
# shareInitialOwner, shareInitialGroup are String
shareCacheEnabled # Boolean
# shareCacheFloor is String (numeric string?)
# shareMoverSchedule, shareMoverLogging are String
# fuseRemember, fuseRememberDefault, fuseRememberStatus are String/Boolean, were problematic
# fuseDirectio, fuseDirectioDefault, fuseDirectioStatus are String/Boolean, were problematic
shareAvahiEnabled # Boolean
# shareAvahiSmbName, shareAvahiSmbModel, shareAvahiAfpName, shareAvahiAfpModel are String
safeMode # Boolean
startMode # String
configValid # Boolean
configError # String
joinStatus # String
deviceCount # Int, might be okay
flashGuid # String
flashProduct # String
flashVendor # String
# regCheck, regFile, regGuid, regTy, regState, regTo, regTm, regTm2, regGen are varied, mostly String/Int
# sbName, sbVersion, sbUpdated, sbEvents, sbState, sbClean, sbSynced, sbSyncErrs, sbSynced2, sbSyncExit are varied
# mdColor, mdNumDisks, mdNumDisabled, mdNumInvalid, mdNumMissing, mdNumNew, mdNumErased are Int, potentially okay if counts
# mdResync, mdResyncCorr, mdResyncPos, mdResyncDb, mdResyncDt, mdResyncAction are varied (Int/Boolean/String)
# mdResyncSize was an overflow
mdState # String (enum)
mdVersion # String
# cacheNumDevices, cacheSbNumDisks were problematic (NaN)
# fsState, fsProgress, fsCopyPrcnt, fsNumMounted, fsNumUnmountable, fsUnmountableMask are varied
shareCount # Int, might be okay
shareSmbCount # Int, might be okay
shareNfsCount # Int, might be okay
shareAfpCount # Int, might be okay
shareMoverActive # Boolean
csrfToken # String
}
}
"""
try:
logger.info("Executing get_unraid_variables tool with a selective query")
response_data = await make_graphql_request(query)
return response_data.get("vars", {})
except Exception as e:
logger.error(f"Error in get_unraid_variables: {e}", exc_info=True)
raise ToolError(f"Failed to retrieve Unraid variables: {str(e)}")
logger.info("System tools registered successfully")

View File

@@ -0,0 +1,162 @@
"""Virtual machine management tools.
This module provides tools for VM lifecycle management and monitoring
including listing VMs, VM operations (start/stop/pause/reboot/etc),
and detailed VM information retrieval.
"""
from typing import Any, Dict, List
from fastmcp import FastMCP
from ..config.logging import logger
from ..core.client import make_graphql_request
from ..core.exceptions import ToolError
def register_vm_tools(mcp: FastMCP):
"""Register all VM tools with the FastMCP instance.
Args:
mcp: FastMCP instance to register tools with
"""
@mcp.tool()
async def list_vms() -> List[Dict[str, Any]]:
"""Lists all Virtual Machines (VMs) on the Unraid system and their current state.
Returns:
List of VM information dictionaries with UUID, name, and state
"""
query = """
query ListVMs {
vms {
id
domains {
id
name
state
uuid
}
}
}
"""
try:
logger.info("Executing list_vms tool")
response_data = await make_graphql_request(query)
logger.info(f"VM query response: {response_data}")
if response_data.get("vms") and response_data["vms"].get("domains"):
vms = response_data["vms"]["domains"]
logger.info(f"Found {len(vms)} VMs")
return vms
else:
logger.info("No VMs found in domains field")
return []
except Exception as e:
logger.error(f"Error in list_vms: {e}", exc_info=True)
error_msg = str(e)
if "VMs are not available" in error_msg:
raise ToolError("VMs are not available on this Unraid server. This could mean: 1) VM support is not enabled, 2) VM service is not running, or 3) no VMs are configured. Check Unraid VM settings.")
else:
raise ToolError(f"Failed to list virtual machines: {error_msg}")
@mcp.tool()
async def manage_vm(vm_uuid: str, action: str) -> Dict[str, Any]:
"""Manages a VM: start, stop, pause, resume, force_stop, reboot, reset. Uses VM UUID.
Args:
vm_uuid: UUID of the VM to manage
action: Action to perform - one of: start, stop, pause, resume, forceStop, reboot, reset
Returns:
Dict containing operation success status and details
"""
valid_actions = ["start", "stop", "pause", "resume", "forceStop", "reboot", "reset"] # Added reset operation
if action not in valid_actions:
logger.warning(f"Invalid action '{action}' for manage_vm")
raise ToolError(f"Invalid action. Must be one of {valid_actions}.")
mutation_name = action
query = f"""
mutation ManageVM($id: PrefixedID!) {{
vm {{
{mutation_name}(id: $id)
}}
}}
"""
variables = {"id": vm_uuid}
try:
logger.info(f"Executing manage_vm tool: action={action}, uuid={vm_uuid}")
response_data = await make_graphql_request(query, variables)
if response_data.get("vm") and mutation_name in response_data["vm"]:
# Mutations for VM return Boolean for success
success = response_data["vm"][mutation_name]
return {"success": success, "action": action, "vm_uuid": vm_uuid}
raise ToolError(f"Failed to {action} VM or unexpected response structure.")
except Exception as e:
logger.error(f"Error in manage_vm ({action}): {e}", exc_info=True)
raise ToolError(f"Failed to {action} virtual machine: {str(e)}")
@mcp.tool()
async def get_vm_details(vm_identifier: str) -> Dict[str, Any]:
"""Retrieves detailed information for a specific VM by its UUID or name.
Args:
vm_identifier: VM UUID or name to retrieve details for
Returns:
Dict containing detailed VM information
"""
# Make direct GraphQL call instead of calling list_vms() tool
query = """
query GetVmDetails {
vms {
domains {
id
name
state
uuid
}
domain {
id
name
state
uuid
}
}
}
"""
try:
logger.info(f"Executing get_vm_details for identifier: {vm_identifier}")
response_data = await make_graphql_request(query)
if response_data.get("vms"):
vms_data = response_data["vms"]
# Try to get VMs from either domains or domain field
vms = vms_data.get("domains") or vms_data.get("domain") or []
if vms:
for vm_data in vms:
if (vm_data.get("uuid") == vm_identifier or
vm_data.get("id") == vm_identifier or
vm_data.get("name") == vm_identifier):
logger.info(f"Found VM {vm_identifier}")
return vm_data
logger.warning(f"VM with identifier '{vm_identifier}' not found.")
available_vms = [f"{vm.get('name')} (UUID: {vm.get('uuid')}, ID: {vm.get('id')})" for vm in vms]
raise ToolError(f"VM '{vm_identifier}' not found. Available VMs: {', '.join(available_vms)}")
else:
raise ToolError("No VMs available or VMs not accessible")
else:
raise ToolError("No VMs data returned from server")
except Exception as e:
logger.error(f"Error in get_vm_details: {e}", exc_info=True)
error_msg = str(e)
if "VMs are not available" in error_msg:
raise ToolError("VMs are not available on this Unraid server. This could mean: 1) VM support is not enabled, 2) VM service is not running, or 3) no VMs are configured. Check Unraid VM settings.")
else:
raise ToolError(f"Failed to retrieve VM details: {error_msg}")
logger.info("VM tools registered successfully")