forked from HomeLab/unraid-mcp
Remove unused MCP resources and update documentation
- Remove array_status, system_info, notifications_overview, and parity_status resources - Keep only logs_stream resource (unraid://logs/stream) which is working properly - Update README.md with current resource documentation and modern docker compose syntax - Fix import path issues that were causing subscription errors - Update environment configuration examples - Clean up subscription manager to only include working log streaming 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
187
unraid_mcp/tools/health.py
Normal file
187
unraid_mcp/tools/health.py
Normal file
@@ -0,0 +1,187 @@
|
||||
"""Comprehensive health monitoring tools.
|
||||
|
||||
This module provides tools for comprehensive health checks of the Unraid MCP server
|
||||
and the underlying Unraid system, including performance metrics, system status,
|
||||
notifications, Docker services, and API responsiveness.
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import time
|
||||
from typing import Any, Dict
|
||||
|
||||
from fastmcp import FastMCP
|
||||
|
||||
from ..config.logging import logger
|
||||
from ..config.settings import UNRAID_API_URL, UNRAID_MCP_HOST, UNRAID_MCP_PORT, UNRAID_MCP_TRANSPORT
|
||||
from ..core.client import make_graphql_request
|
||||
from ..core.exceptions import ToolError
|
||||
|
||||
|
||||
def register_health_tools(mcp: FastMCP):
|
||||
"""Register all health tools with the FastMCP instance.
|
||||
|
||||
Args:
|
||||
mcp: FastMCP instance to register tools with
|
||||
"""
|
||||
|
||||
@mcp.tool()
|
||||
async def health_check() -> Dict[str, Any]:
|
||||
"""Returns comprehensive health status of the Unraid MCP server and system for monitoring purposes."""
|
||||
start_time = time.time()
|
||||
health_status = "healthy"
|
||||
issues = []
|
||||
|
||||
try:
|
||||
# Enhanced health check with multiple system components
|
||||
comprehensive_query = """
|
||||
query ComprehensiveHealthCheck {
|
||||
info {
|
||||
machineId
|
||||
time
|
||||
versions { unraid }
|
||||
os { uptime }
|
||||
}
|
||||
array {
|
||||
state
|
||||
}
|
||||
notifications {
|
||||
overview {
|
||||
unread { alert warning total }
|
||||
}
|
||||
}
|
||||
docker {
|
||||
containers(skipCache: true) {
|
||||
id
|
||||
state
|
||||
status
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
response_data = await make_graphql_request(comprehensive_query)
|
||||
api_latency = round((time.time() - start_time) * 1000, 2) # ms
|
||||
|
||||
# Base health info
|
||||
health_info = {
|
||||
"status": health_status,
|
||||
"timestamp": datetime.datetime.utcnow().isoformat(),
|
||||
"api_latency_ms": api_latency,
|
||||
"server": {
|
||||
"name": "Unraid MCP Server",
|
||||
"version": "0.1.0",
|
||||
"transport": UNRAID_MCP_TRANSPORT,
|
||||
"host": UNRAID_MCP_HOST,
|
||||
"port": UNRAID_MCP_PORT,
|
||||
"process_uptime_seconds": time.time() - start_time # Rough estimate
|
||||
}
|
||||
}
|
||||
|
||||
if not response_data:
|
||||
health_status = "unhealthy"
|
||||
issues.append("No response from Unraid API")
|
||||
health_info["status"] = health_status
|
||||
health_info["issues"] = issues
|
||||
return health_info
|
||||
|
||||
# System info analysis
|
||||
info = response_data.get("info", {})
|
||||
if info:
|
||||
health_info["unraid_system"] = {
|
||||
"status": "connected",
|
||||
"url": UNRAID_API_URL,
|
||||
"machine_id": info.get("machineId"),
|
||||
"time": info.get("time"),
|
||||
"version": info.get("versions", {}).get("unraid"),
|
||||
"uptime": info.get("os", {}).get("uptime")
|
||||
}
|
||||
else:
|
||||
health_status = "degraded"
|
||||
issues.append("Unable to retrieve system info")
|
||||
|
||||
# Array health analysis
|
||||
array_info = response_data.get("array", {})
|
||||
if array_info:
|
||||
array_state = array_info.get("state", "unknown")
|
||||
health_info["array_status"] = {
|
||||
"state": array_state,
|
||||
"healthy": array_state in ["STARTED", "STOPPED"]
|
||||
}
|
||||
if array_state not in ["STARTED", "STOPPED"]:
|
||||
health_status = "warning"
|
||||
issues.append(f"Array in unexpected state: {array_state}")
|
||||
else:
|
||||
health_status = "warning"
|
||||
issues.append("Unable to retrieve array status")
|
||||
|
||||
# Notifications analysis
|
||||
notifications = response_data.get("notifications", {})
|
||||
if notifications and notifications.get("overview"):
|
||||
unread = notifications["overview"].get("unread", {})
|
||||
alert_count = unread.get("alert", 0)
|
||||
warning_count = unread.get("warning", 0)
|
||||
total_unread = unread.get("total", 0)
|
||||
|
||||
health_info["notifications"] = {
|
||||
"unread_total": total_unread,
|
||||
"unread_alerts": alert_count,
|
||||
"unread_warnings": warning_count,
|
||||
"has_critical_notifications": alert_count > 0
|
||||
}
|
||||
|
||||
if alert_count > 0:
|
||||
health_status = "warning"
|
||||
issues.append(f"{alert_count} unread alert notification(s)")
|
||||
|
||||
# Docker services analysis
|
||||
docker_info = response_data.get("docker", {})
|
||||
if docker_info and docker_info.get("containers"):
|
||||
containers = docker_info["containers"]
|
||||
running_containers = [c for c in containers if c.get("state") == "running"]
|
||||
stopped_containers = [c for c in containers if c.get("state") == "exited"]
|
||||
|
||||
health_info["docker_services"] = {
|
||||
"total_containers": len(containers),
|
||||
"running_containers": len(running_containers),
|
||||
"stopped_containers": len(stopped_containers),
|
||||
"containers_healthy": len([c for c in containers if c.get("status", "").startswith("Up")])
|
||||
}
|
||||
|
||||
# API performance assessment
|
||||
if api_latency > 5000: # > 5 seconds
|
||||
health_status = "warning"
|
||||
issues.append(f"High API latency: {api_latency}ms")
|
||||
elif api_latency > 10000: # > 10 seconds
|
||||
health_status = "degraded"
|
||||
issues.append(f"Very high API latency: {api_latency}ms")
|
||||
|
||||
# Final status determination
|
||||
health_info["status"] = health_status
|
||||
if issues:
|
||||
health_info["issues"] = issues
|
||||
|
||||
# Add performance metrics
|
||||
health_info["performance"] = {
|
||||
"api_response_time_ms": api_latency,
|
||||
"health_check_duration_ms": round((time.time() - start_time) * 1000, 2)
|
||||
}
|
||||
|
||||
return health_info
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Health check failed: {e}")
|
||||
return {
|
||||
"status": "unhealthy",
|
||||
"timestamp": datetime.datetime.utcnow().isoformat(),
|
||||
"error": str(e),
|
||||
"api_latency_ms": round((time.time() - start_time) * 1000, 2) if 'start_time' in locals() else None,
|
||||
"server": {
|
||||
"name": "Unraid MCP Server",
|
||||
"version": "0.1.0",
|
||||
"transport": UNRAID_MCP_TRANSPORT,
|
||||
"host": UNRAID_MCP_HOST,
|
||||
"port": UNRAID_MCP_PORT
|
||||
}
|
||||
}
|
||||
|
||||
logger.info("Health tools registered successfully")
|
||||
Reference in New Issue
Block a user