diff --git a/ai_software_factory/agents/database_manager.py b/ai_software_factory/agents/database_manager.py index 7936140..67a1dcd 100644 --- a/ai_software_factory/agents/database_manager.py +++ b/ai_software_factory/agents/database_manager.py @@ -2503,6 +2503,7 @@ class DatabaseManager: archived_project_bundles = [self.get_project_audit_data(project.project_id) for project in archived_projects[:limit]] all_project_bundles = [self.get_project_audit_data(project.project_id) for project in active_projects] all_project_bundles.extend(self.get_project_audit_data(project.project_id) for project in archived_projects) + recent_llm_traces = self.get_llm_traces(limit=limit * 20) system_logs = self.db.query(SystemLog).order_by(SystemLog.created_at.desc()).limit(limit).all() return { "summary": { @@ -2532,6 +2533,7 @@ class DatabaseManager: } for log in system_logs ], + "recent_llm_traces": recent_llm_traces, "lineage_links": self.get_prompt_change_links(limit=limit * 10), "correlations": self.get_prompt_change_correlations(limit=limit), "prompt_queue": { diff --git a/ai_software_factory/agents/llm_service.py b/ai_software_factory/agents/llm_service.py index 08143ef..79ae10c 100644 --- a/ai_software_factory/agents/llm_service.py +++ b/ai_software_factory/agents/llm_service.py @@ -399,6 +399,7 @@ class LLMServiceClient: 'provider': 'ollama', 'ollama_url': self.ollama_url, 'model': self.model, + 'request_timeout_seconds': self.request_timeout_seconds, 'guardrails': { 'global': settings.llm_guardrail_prompt, 'request_interpretation': settings.llm_request_interpreter_guardrail_prompt, diff --git a/ai_software_factory/agents/request_interpreter.py b/ai_software_factory/agents/request_interpreter.py index 129e185..721cfb4 100644 --- a/ai_software_factory/agents/request_interpreter.py +++ b/ai_software_factory/agents/request_interpreter.py @@ -15,6 +15,14 @@ except ImportError: from agents.llm_service import LLMServiceClient +class RequestInterpretationError(RuntimeError): + """Raised when one LLM-driven request interpretation stage fails.""" + + def __init__(self, message: str, *, trace: dict | None = None): + super().__init__(message) + self.trace = trace or {} + + class RequestInterpreter: """Use Ollama to turn free-form text into a structured software request.""" @@ -91,26 +99,37 @@ class RequestInterpreter: if not content: detail = self.llm_client.extract_error_message(trace) if detail: - raise RuntimeError(f'LLM request interpretation failed: {detail}') - raise RuntimeError('LLM request interpretation did not return a usable response.') + raise RequestInterpretationError(f'LLM request interpretation failed: {detail}', trace=trace) + raise RequestInterpretationError('LLM request interpretation did not return a usable response.', trace=trace) try: parsed = json.loads(content) except Exception as exc: - raise RuntimeError('LLM request interpretation did not return valid JSON.') from exc + raise RequestInterpretationError('LLM request interpretation did not return valid JSON.', trace=trace) from exc - interpreted = self._normalize_interpreted_request(parsed) - routing = self._normalize_routing(parsed.get('routing'), interpreted, compact_context) + try: + interpreted = self._normalize_interpreted_request(parsed) + routing = self._normalize_routing(parsed.get('routing'), interpreted, compact_context) + except RuntimeError as exc: + raise RequestInterpretationError(str(exc), trace=trace) from exc if routing.get('intent') == 'continue_project' and routing.get('project_name'): interpreted['name'] = routing['project_name'] naming_trace = None if routing.get('intent') == 'new_project': - interpreted, routing, naming_trace = await self._refine_new_project_identity( - prompt_text=normalized, - interpreted=interpreted, - routing=routing, - context=compact_context, - ) + try: + interpreted, routing, naming_trace = await self._refine_new_project_identity( + prompt_text=normalized, + interpreted=interpreted, + routing=routing, + context=compact_context, + ) + except RequestInterpretationError as exc: + combined_trace = dict(trace) + combined_trace['routing'] = routing + combined_trace['context_excerpt'] = compact_context + if exc.trace: + combined_trace['project_naming'] = exc.trace + raise RequestInterpretationError(str(exc), trace=combined_trace) from exc trace['routing'] = routing trace['context_excerpt'] = compact_context if naming_trace is not None: @@ -146,15 +165,18 @@ class RequestInterpreter: if not content: detail = self.llm_client.extract_error_message(trace) if detail: - raise RuntimeError(f'LLM project naming failed: {detail}') - raise RuntimeError('LLM project naming did not return a usable response.') + raise RequestInterpretationError(f'LLM project naming failed: {detail}', trace=trace) + raise RequestInterpretationError('LLM project naming did not return a usable response.', trace=trace) try: parsed = json.loads(content) except Exception as exc: - raise RuntimeError('LLM project naming did not return valid JSON.') from exc + raise RequestInterpretationError('LLM project naming did not return valid JSON.', trace=trace) from exc - project_name, repo_name = self._normalize_project_identity(parsed) + try: + project_name, repo_name = self._normalize_project_identity(parsed) + except RuntimeError as exc: + raise RequestInterpretationError(str(exc), trace=trace) from exc repo_name = self._ensure_unique_repo_name(repo_name, constraints['repo_names']) interpreted['name'] = project_name routing['project_name'] = project_name @@ -368,8 +390,30 @@ class RequestInterpreter: def _normalize_project_identity(self, payload: dict) -> tuple[str, str]: """Validate model-proposed project and repository naming.""" - project_candidate = str(payload.get('project_name') or payload.get('name') or '').strip() - repo_candidate = str(payload.get('repo_name') or '').strip() + project_payload = payload.get('project') if isinstance(payload.get('project'), dict) else {} + repository_payload = payload.get('repository') if isinstance(payload.get('repository'), dict) else {} + project_candidate = str( + payload.get('project_name') + or payload.get('name') + or payload.get('title') + or payload.get('display_name') + or project_payload.get('project_name') + or project_payload.get('name') + or project_payload.get('title') + or project_payload.get('display_name') + or '' + ).strip() + repo_candidate = str( + payload.get('repo_name') + or payload.get('repo') + or payload.get('slug') + or payload.get('repository_name') + or payload.get('repository_slug') + or repository_payload.get('repo_name') + or repository_payload.get('name') + or repository_payload.get('slug') + or '' + ).strip() if not project_candidate: raise RuntimeError('LLM project naming did not provide a project name.') if not repo_candidate: diff --git a/ai_software_factory/dashboard_ui.py b/ai_software_factory/dashboard_ui.py index 91cec6f..8c574ee 100644 --- a/ai_software_factory/dashboard_ui.py +++ b/ai_software_factory/dashboard_ui.py @@ -545,6 +545,52 @@ def _render_change_list(changes: list[dict]) -> None: _render_side_by_side_diff(change['diff_text']) +def _extract_llm_trace_error(trace: dict) -> str | None: + """Extract one useful failure message from a persisted LLM trace.""" + if not isinstance(trace, dict): + return None + raw_response = trace.get('raw_response') if isinstance(trace.get('raw_response'), dict) else {} + provider_trace = raw_response.get('provider_trace') if isinstance(raw_response.get('provider_trace'), dict) else {} + provider_response = raw_response.get('provider_response') if isinstance(raw_response.get('provider_response'), dict) else {} + nested_provider_response = provider_trace.get('provider_response') if isinstance(provider_trace.get('provider_response'), dict) else {} + candidates = [ + raw_response.get('failure_message'), + raw_response.get('error'), + provider_response.get('error'), + provider_trace.get('error'), + nested_provider_response.get('error'), + ] + for candidate in candidates: + if candidate: + return str(candidate) + return None + + +def _build_llm_insights(traces: list[dict]) -> dict: + """Summarize recent LLM activity for dashboard visibility.""" + normalized = [trace for trace in traces if isinstance(trace, dict)] + by_stage: dict[str, int] = {} + error_by_stage: dict[str, int] = {} + recent_errors: list[dict] = [] + fallback_traces = 0 + for trace in normalized: + stage = str(trace.get('stage') or 'unknown') + by_stage[stage] = by_stage.get(stage, 0) + 1 + if trace.get('fallback_used'): + fallback_traces += 1 + if _extract_llm_trace_error(trace): + error_by_stage[stage] = error_by_stage.get(stage, 0) + 1 + recent_errors.append(trace) + return { + 'total_traces': len(normalized), + 'error_traces': len(recent_errors), + 'fallback_traces': fallback_traces, + 'by_stage': by_stage, + 'error_by_stage': error_by_stage, + 'recent_errors': recent_errors[:8], + } + + def _render_llm_traces(traces: list[dict]) -> None: """Render persisted LLM request/response traces for a prompt.""" if not traces: @@ -560,12 +606,17 @@ def _render_llm_traces(traces: list[dict]) -> None: ui.label(f'{provider}:{model}').classes('factory-chip') if trace.get('fallback_used'): ui.label('Fallback path used').classes('factory-chip') + error_message = _extract_llm_trace_error(trace) + if error_message: + ui.label(error_message).classes('factory-chip') with ui.expansion('System prompt').classes('w-full q-mt-sm'): ui.label(trace.get('system_prompt') or 'No system prompt recorded').classes('factory-code') with ui.expansion('User prompt').classes('w-full q-mt-sm'): ui.label(trace.get('user_prompt') or 'No user prompt recorded').classes('factory-code') with ui.expansion('Assistant response').classes('w-full q-mt-sm'): ui.label(trace.get('assistant_response') or 'No assistant response recorded').classes('factory-code') + with ui.expansion('Raw provider response').classes('w-full q-mt-sm'): + ui.label(json.dumps(trace.get('raw_response'), indent=2, sort_keys=True) if trace.get('raw_response') is not None else 'No raw response recorded').classes('factory-code') def _filter_llm_traces(traces: list[dict], stage: str, model: str, search_query: str) -> list[dict]: @@ -1473,6 +1524,7 @@ def create_dashboard(): } projects = snapshot['projects'] all_llm_traces = [trace for project_bundle in projects for trace in project_bundle.get('llm_traces', [])] + recent_llm_traces = snapshot.get('recent_llm_traces', []) return { 'snapshot': snapshot, 'summary': snapshot['summary'], @@ -1490,6 +1542,8 @@ def create_dashboard(): 'discovered_repositories': discovered_repositories, 'prompt_settings': prompt_settings, 'runtime_settings': runtime_settings, + 'recent_llm_traces': recent_llm_traces, + 'llm_insights': _build_llm_insights(recent_llm_traces), 'llm_stage_options': [''] + sorted({trace.get('stage') for trace in all_llm_traces if trace.get('stage')}), 'llm_model_options': [''] + sorted({trace.get('model') for trace in all_llm_traces if trace.get('model')}), 'project_repository_map': { @@ -1954,6 +2008,7 @@ def create_dashboard(): return system_logs = view_model['system_logs'] llm_runtime = view_model['llm_runtime'] + llm_insights = view_model.get('llm_insights', {}) discovered_repositories = view_model['discovered_repositories'] prompt_settings = view_model.get('prompt_settings', []) runtime_settings = view_model.get('runtime_settings', []) @@ -1971,6 +2026,7 @@ def create_dashboard(): ('Provider', llm_runtime.get('provider')), ('Model', llm_runtime.get('model')), ('Ollama URL', llm_runtime.get('ollama_url')), + ('Request Timeout', str(llm_runtime.get('request_timeout_seconds') or 'n/a')), ('Tool Context Limit', str(llm_runtime.get('tool_context_limit'))), ('Max Tool Call Rounds', str(llm_runtime.get('max_tool_call_rounds'))), ('Live Gitea Tools Configured', 'yes' if llm_runtime.get('gitea_live_tools_configured') else 'no'), @@ -2007,6 +2063,25 @@ def create_dashboard(): for label, text in system_prompts.items(): ui.label(label.replace('_', ' ').title()).classes('factory-muted q-mt-sm') ui.label(text or 'Not configured').classes('factory-code') + with ui.card().classes('factory-panel q-pa-lg'): + ui.label('LLM Insights').style('font-size: 1.25rem; font-weight: 700; color: #3a281a;') + for label, value in [ + ('Recent Traces', llm_insights.get('total_traces', 0)), + ('Recent Errors', llm_insights.get('error_traces', 0)), + ('Fallback Traces', llm_insights.get('fallback_traces', 0)), + ]: + with ui.row().classes('justify-between w-full q-mt-sm'): + ui.label(label).classes('factory-muted') + ui.label(str(value)).style('font-weight: 600; color: #3a281a;') + if llm_insights.get('by_stage'): + ui.label('Trace Volume By Stage').style('font-weight: 700; color: #3a281a; margin-top: 12px;') + for stage_name, count in sorted(llm_insights.get('by_stage', {}).items()): + error_count = llm_insights.get('error_by_stage', {}).get(stage_name, 0) + ui.markdown(f"- **{stage_name}**: {count} trace(s), {error_count} error(s)") + if llm_insights.get('recent_errors'): + ui.label('Recent LLM Errors').style('font-weight: 700; color: #3a281a; margin-top: 12px;') + for trace in llm_insights.get('recent_errors', []): + ui.markdown(f"- **{trace.get('stage') or 'llm'}** · {trace.get('timestamp') or 'n/a'} · {escape(_extract_llm_trace_error(trace) or 'Unknown error')}") with ui.card().classes('factory-panel q-pa-lg'): ui.label('Home Assistant and Queue Settings').style('font-size: 1.25rem; font-weight: 700; color: #3a281a;') ui.label('Keep only the Home Assistant base URL and access token in the environment. Entity ids, thresholds, and queue behavior are edited here and persisted in the database.').classes('factory-muted') diff --git a/ai_software_factory/main.py b/ai_software_factory/main.py index 25d246a..fc80db3 100644 --- a/ai_software_factory/main.py +++ b/ai_software_factory/main.py @@ -31,7 +31,7 @@ try: from .agents.change_summary import ChangeSummaryGenerator from .agents.database_manager import DatabaseManager from .agents.home_assistant import HomeAssistantAgent - from .agents.request_interpreter import RequestInterpreter + from .agents.request_interpreter import RequestInterpreter, RequestInterpretationError from .agents.llm_service import LLMServiceClient from .agents.orchestrator import AgentOrchestrator from .agents.n8n_setup import N8NSetupAgent @@ -45,7 +45,7 @@ except ImportError: from agents.change_summary import ChangeSummaryGenerator from agents.database_manager import DatabaseManager from agents.home_assistant import HomeAssistantAgent - from agents.request_interpreter import RequestInterpreter + from agents.request_interpreter import RequestInterpreter, RequestInterpretationError from agents.llm_service import LLMServiceClient from agents.orchestrator import AgentOrchestrator from agents.n8n_setup import N8NSetupAgent @@ -80,6 +80,7 @@ app = FastAPI(lifespan=lifespan) DbSession = Annotated[Session, Depends(database_module.get_db)] PROJECT_ID_PATTERN = re.compile(r"[^a-z0-9]+") +UNASSIGNED_LLM_TRACE_PROJECT_ID = '__unassigned__' class SoftwareRequest(BaseModel): @@ -673,6 +674,48 @@ async def _interpret_freeform_request(request: FreeformSoftwareRequest, manager: return SoftwareRequest(**interpreted), routing, interpretation_trace +def _persist_llm_trace(manager: DatabaseManager, *, project_id: str, trace: dict, prompt_id: int | None = None, history_id: int | None = None) -> None: + """Persist one LLM trace payload when enough metadata is available.""" + if not isinstance(trace, dict) or not trace.get('stage'): + return + manager.log_llm_trace( + project_id=project_id, + history_id=history_id, + prompt_id=prompt_id, + stage=trace['stage'], + provider=trace.get('provider') or 'unknown', + model=trace.get('model') or 'unknown', + system_prompt=trace.get('system_prompt') or '', + user_prompt=trace.get('user_prompt') or '', + assistant_response=trace.get('assistant_response') or '', + raw_response=trace.get('raw_response'), + fallback_used=trace.get('fallback_used', False), + ) + + +def _persist_failed_freeform_llm_traces(manager: DatabaseManager, *, request: FreeformSoftwareRequest, error: RequestInterpretationError) -> None: + """Persist failed interpretation traces under an unassigned bucket for dashboard inspection.""" + trace = error.trace if isinstance(error.trace, dict) else {} + if not trace: + return + base_trace = dict(trace) + base_trace['raw_response'] = { + 'failure_message': str(error), + 'source': {'type': request.source, 'chat_id': request.chat_id, 'chat_type': request.chat_type}, + 'provider_trace': trace.get('raw_response'), + } + _persist_llm_trace(manager, project_id=UNASSIGNED_LLM_TRACE_PROJECT_ID, trace=base_trace) + naming_trace = trace.get('project_naming') if isinstance(trace.get('project_naming'), dict) else None + if naming_trace: + enriched_naming_trace = dict(naming_trace) + enriched_naming_trace['raw_response'] = { + 'failure_message': str(error), + 'source': {'type': request.source, 'chat_id': request.chat_id, 'chat_type': request.chat_type}, + 'provider_trace': naming_trace.get('raw_response'), + } + _persist_llm_trace(manager, project_id=UNASSIGNED_LLM_TRACE_PROJECT_ID, trace=enriched_naming_trace) + + async def _run_freeform_generation( request: FreeformSoftwareRequest, db: Session, @@ -703,33 +746,21 @@ async def _run_freeform_generation( manager = DatabaseManager(db) prompts = manager.get_prompt_events(project_id=project_data.get('project_id')) prompt_id = prompts[0]['id'] if prompts else None - manager.log_llm_trace( + _persist_llm_trace( + manager, project_id=project_data.get('project_id'), history_id=project_data.get('history_id'), prompt_id=prompt_id, - stage=interpretation_trace['stage'], - provider=interpretation_trace['provider'], - model=interpretation_trace['model'], - system_prompt=interpretation_trace['system_prompt'], - user_prompt=interpretation_trace['user_prompt'], - assistant_response=interpretation_trace['assistant_response'], - raw_response=interpretation_trace.get('raw_response'), - fallback_used=interpretation_trace.get('fallback_used', False), + trace=interpretation_trace, ) naming_trace = interpretation_trace.get('project_naming') if naming_trace: - manager.log_llm_trace( + _persist_llm_trace( + manager, project_id=project_data.get('project_id'), history_id=project_data.get('history_id'), prompt_id=prompt_id, - stage=naming_trace['stage'], - provider=naming_trace['provider'], - model=naming_trace['model'], - system_prompt=naming_trace['system_prompt'], - user_prompt=naming_trace['user_prompt'], - assistant_response=naming_trace['assistant_response'], - raw_response=naming_trace.get('raw_response'), - fallback_used=naming_trace.get('fallback_used', False), + trace=naming_trace, ) response['interpreted_request'] = structured_request.model_dump() response['routing'] = routing @@ -749,6 +780,11 @@ async def _run_freeform_generation( }, ) return response + except RequestInterpretationError as exc: + _persist_failed_freeform_llm_traces(manager, request=request, error=exc) + if queue_item_id is not None: + DatabaseManager(db).fail_queued_prompt(queue_item_id, str(exc)) + raise except Exception as exc: if queue_item_id is not None: DatabaseManager(db).fail_queued_prompt(queue_item_id, str(exc))