release: version 0.9.16 🚀

fix: better LLM failure tracing, refs NOISSUE
release: version 0.9.15 🚀
2026-04-11 22:38:41 +02:00 · 2026-04-11 22:38:38 +02:00 · 2026-04-11 22:20:14 +02:00 · 2026-04-11 22:19:42 +02:00 · 2026-04-11 21:40:53 +02:00 · 2026-04-11 21:40:50 +02:00
9 changed files with 589 additions and 43 deletions
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -5,11 +5,56 @@ Changelog
 (unreleased)
 ------------
 Fix
 ~~~
 - Better LLM failure tracing, refs NOISSUE. [Simon Diesenreiter]
 0.9.15 (2026-04-11)
 -------------------
 Fix
 ~~~
 - Increase LLM timeouts, refs NOISSUE. [Simon Diesenreiter]
 Other
 ~~~~~
 0.9.14 (2026-04-11)
 -------------------
 Fix
 ~~~
 - Add Ollama connection health details in UI, refs NOISSUE. [Simon
  Diesenreiter]
 Other
 ~~~~~
 0.9.13 (2026-04-11)
 -------------------
 Fix
 ~~~
 - Fix internal server error, refs NOISSUE. [Simon Diesenreiter]
 Other
 ~~~~~
 0.9.12 (2026-04-11)
 -------------------
 Fix
 ~~~
 - Remove heuristic decision making fallbacks, refs NOISSUE. [Simon
  Diesenreiter]
 Other
 ~~~~~
 0.9.11 (2026-04-11)
 -------------------
--- a/ai_software_factory/VERSION
+++ b/ai_software_factory/VERSION
@@ -1 +1 @@
-0.9.12
+0.9.16
--- a/ai_software_factory/agents/database_manager.py
+++ b/ai_software_factory/agents/database_manager.py
@@ -2503,6 +2503,7 @@ class DatabaseManager:
        archived_project_bundles = [self.get_project_audit_data(project.project_id) for project in archived_projects[:limit]]
        all_project_bundles = [self.get_project_audit_data(project.project_id) for project in active_projects]
        all_project_bundles.extend(self.get_project_audit_data(project.project_id) for project in archived_projects)
        recent_llm_traces = self.get_llm_traces(limit=limit * 20)
        system_logs = self.db.query(SystemLog).order_by(SystemLog.created_at.desc()).limit(limit).all()
        return {
            "summary": {
@@ -2532,6 +2533,7 @@ class DatabaseManager:
                }
                for log in system_logs
            ],
            "recent_llm_traces": recent_llm_traces,
            "lineage_links": self.get_prompt_change_links(limit=limit * 10),
            "correlations": self.get_prompt_change_correlations(limit=limit),
            "prompt_queue": {
--- a/ai_software_factory/agents/llm_service.py
+++ b/ai_software_factory/agents/llm_service.py
@@ -3,6 +3,8 @@
 from __future__ import annotations
 import json
 from urllib import error as urllib_error
 from urllib import request as urllib_request
 try:
    from .gitea import GiteaAPI
@@ -183,6 +185,7 @@ class LLMServiceClient:
    def __init__(self, ollama_url: str | None = None, model: str | None = None):
        self.ollama_url = (ollama_url or settings.ollama_url).rstrip('/')
        self.model = model or settings.OLLAMA_MODEL
        self.request_timeout_seconds = settings.llm_request_timeout_seconds
        self.toolbox = LLMToolbox()
        self.live_tool_executor = LLMLiveToolExecutor()
@@ -288,15 +291,39 @@ class LLMServiceClient:
        try:
            import aiohttp
-            async with aiohttp.ClientSession() as session:
+            async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=self.request_timeout_seconds)) as session:
                async with session.post(f'{self.ollama_url}/api/chat', json=request_payload) as resp:
                    payload = await resp.json()
                    if 200 <= resp.status < 300:
                        return (payload.get('message') or {}).get('content', ''), payload, None
                    return None, payload, str(payload.get('error') or payload)
        except Exception as exc:
            if exc.__class__.__name__ == 'TimeoutError':
                message = f'LLM request timed out after {self.request_timeout_seconds} seconds'
                return None, {'error': message}, message
            return None, {'error': str(exc)}, str(exc)
    @staticmethod
    def extract_error_message(trace: dict | None) -> str | None:
        """Extract the most useful provider error message from a trace payload."""
        if not isinstance(trace, dict):
            return None
        raw_response = trace.get('raw_response') if isinstance(trace.get('raw_response'), dict) else {}
        provider_response = raw_response.get('provider_response') if isinstance(raw_response.get('provider_response'), dict) else {}
        candidate_errors = [
            provider_response.get('error'),
            raw_response.get('error'),
            trace.get('error'),
        ]
        raw_responses = trace.get('raw_responses') if isinstance(trace.get('raw_responses'), list) else []
        for payload in reversed(raw_responses):
            if isinstance(payload, dict) and payload.get('error'):
                candidate_errors.append(payload.get('error'))
        for candidate in candidate_errors:
            if candidate:
                return str(candidate).strip()
        return None
    def _compose_system_prompt(self, stage: str, stage_prompt: str) -> str:
        """Merge the stage prompt with configured guardrails."""
        sections = [stage_prompt.strip()] + self._guardrail_sections(stage)
@@ -372,6 +399,7 @@ class LLMServiceClient:
            'provider': 'ollama',
            'ollama_url': self.ollama_url,
            'model': self.model,
            'request_timeout_seconds': self.request_timeout_seconds,
            'guardrails': {
                'global': settings.llm_guardrail_prompt,
                'request_interpretation': settings.llm_request_interpreter_guardrail_prompt,
@@ -392,3 +420,117 @@ class LLMServiceClient:
            'max_tool_call_rounds': settings.llm_max_tool_call_rounds,
            'gitea_live_tools_configured': bool(settings.gitea_url and settings.gitea_token),
        }
    def health_check_sync(self) -> dict:
        """Synchronously check Ollama reachability and configured model availability."""
        if not self.ollama_url:
            return {
                'status': 'error',
                'message': 'OLLAMA_URL is not configured.',
                'ollama_url': 'Not configured',
                'model': self.model,
                'checks': [],
                'suggestion': 'Set OLLAMA_URL to the reachable Ollama base URL.',
            }
        tags_url = f'{self.ollama_url}/api/tags'
        try:
            req = urllib_request.Request(tags_url, headers={'User-Agent': 'AI-Software-Factory'}, method='GET')
            with urllib_request.urlopen(req, timeout=5) as resp:
                raw_body = resp.read().decode('utf-8')
                payload = json.loads(raw_body) if raw_body else {}
        except urllib_error.HTTPError as exc:
            body = exc.read().decode('utf-8') if exc.fp else ''
            message = body or str(exc)
            return {
                'status': 'error',
                'message': f'Ollama returned HTTP {exc.code}: {message}',
                'ollama_url': self.ollama_url,
                'model': self.model,
                'checks': [
                    {
                        'name': 'api_tags',
                        'ok': False,
                        'status_code': exc.code,
                        'url': tags_url,
                        'message': message,
                    }
                ],
                'suggestion': 'Verify OLLAMA_URL points to the Ollama service and that the API is reachable.',
            }
        except Exception as exc:
            return {
                'status': 'error',
                'message': f'Unable to reach Ollama: {exc}',
                'ollama_url': self.ollama_url,
                'model': self.model,
                'checks': [
                    {
                        'name': 'api_tags',
                        'ok': False,
                        'status_code': None,
                        'url': tags_url,
                        'message': str(exc),
                    }
                ],
                'suggestion': 'Verify OLLAMA_URL resolves from the running factory process and that Ollama is listening on that address.',
            }
        models = payload.get('models') if isinstance(payload, dict) else []
        model_names: list[str] = []
        if isinstance(models, list):
            for model_entry in models:
                if not isinstance(model_entry, dict):
                    continue
                name = str(model_entry.get('name') or model_entry.get('model') or '').strip()
                if name:
                    model_names.append(name)
        requested = (self.model or '').strip()
        requested_base = requested.split(':', 1)[0]
        model_available = any(
            name == requested or name.startswith(f'{requested}:') or name.split(':', 1)[0] == requested_base
            for name in model_names
        )
        checks = [
            {
                'name': 'api_tags',
                'ok': True,
                'status_code': 200,
                'url': tags_url,
                'message': f'Loaded {len(model_names)} model entries.',
            },
            {
                'name': 'configured_model',
                'ok': model_available,
                'status_code': None,
                'url': None,
                'message': (
                    f'Configured model {requested} is available.'
                    if model_available else
                    f'Configured model {requested} was not found in Ollama tags.'
                ),
            },
        ]
        if model_available:
            return {
                'status': 'success',
                'message': f'Ollama is reachable and model {requested} is available.',
                'ollama_url': self.ollama_url,
                'model': requested,
                'model_available': True,
                'model_count': len(model_names),
                'models': model_names[:10],
                'checks': checks,
            }
        return {
            'status': 'error',
            'message': f'Ollama is reachable, but model {requested} is not available.',
            'ollama_url': self.ollama_url,
            'model': requested,
            'model_available': False,
            'model_count': len(model_names),
            'models': model_names[:10],
            'checks': checks,
            'suggestion': f'Pull or configure the model {requested}, or update OLLAMA_MODEL to a model that exists in Ollama.',
        }
--- a/ai_software_factory/agents/orchestrator.py
+++ b/ai_software_factory/agents/orchestrator.py
@@ -392,6 +392,9 @@ class AgentOrchestrator:
            f"existing_workspace={has_existing_files}",
        )
        if not content:
            detail = LLMServiceClient.extract_error_message(trace)
            if detail:
                raise RuntimeError(f'LLM code generation failed: {detail}')
            raise RuntimeError('LLM code generation did not return a usable response.')
        if not generated_files:
            raise RuntimeError('LLM code generation did not return any writable files.')
--- a/ai_software_factory/agents/request_interpreter.py
+++ b/ai_software_factory/agents/request_interpreter.py
@@ -15,6 +15,14 @@ except ImportError:
    from agents.llm_service import LLMServiceClient
 class RequestInterpretationError(RuntimeError):
    """Raised when one LLM-driven request interpretation stage fails."""
    def __init__(self, message: str, *, trace: dict | None = None):
        super().__init__(message)
        self.trace = trace or {}
 class RequestInterpreter:
    """Use Ollama to turn free-form text into a structured software request."""
@@ -89,25 +97,39 @@ class RequestInterpreter:
            expect_json=True,
        )
        if not content:
-            raise RuntimeError('LLM request interpretation did not return a usable response.')
+            detail = self.llm_client.extract_error_message(trace)
            if detail:
                raise RequestInterpretationError(f'LLM request interpretation failed: {detail}', trace=trace)
            raise RequestInterpretationError('LLM request interpretation did not return a usable response.', trace=trace)
        try:
            parsed = json.loads(content)
        except Exception as exc:
-            raise RuntimeError('LLM request interpretation did not return valid JSON.') from exc
+            raise RequestInterpretationError('LLM request interpretation did not return valid JSON.', trace=trace) from exc
-        interpreted = self._normalize_interpreted_request(parsed)
+        try:
-        routing = self._normalize_routing(parsed.get('routing'), interpreted, compact_context)
+            interpreted = self._normalize_interpreted_request(parsed)
            routing = self._normalize_routing(parsed.get('routing'), interpreted, compact_context)
        except RuntimeError as exc:
            raise RequestInterpretationError(str(exc), trace=trace) from exc
        if routing.get('intent') == 'continue_project' and routing.get('project_name'):
            interpreted['name'] = routing['project_name']
        naming_trace = None
        if routing.get('intent') == 'new_project':
-            interpreted, routing, naming_trace = await self._refine_new_project_identity(
+            try:
-                prompt_text=normalized,
+                interpreted, routing, naming_trace = await self._refine_new_project_identity(
-                interpreted=interpreted,
+                    prompt_text=normalized,
-                routing=routing,
+                    interpreted=interpreted,
-                context=compact_context,
+                    routing=routing,
-            )
+                    context=compact_context,
                )
            except RequestInterpretationError as exc:
                combined_trace = dict(trace)
                combined_trace['routing'] = routing
                combined_trace['context_excerpt'] = compact_context
                if exc.trace:
                    combined_trace['project_naming'] = exc.trace
                raise RequestInterpretationError(str(exc), trace=combined_trace) from exc
        trace['routing'] = routing
        trace['context_excerpt'] = compact_context
        if naming_trace is not None:
@@ -141,14 +163,20 @@ class RequestInterpreter:
            expect_json=True,
        )
        if not content:
-            raise RuntimeError('LLM project naming did not return a usable response.')
+            detail = self.llm_client.extract_error_message(trace)
            if detail:
                raise RequestInterpretationError(f'LLM project naming failed: {detail}', trace=trace)
            raise RequestInterpretationError('LLM project naming did not return a usable response.', trace=trace)
        try:
            parsed = json.loads(content)
        except Exception as exc:
-            raise RuntimeError('LLM project naming did not return valid JSON.') from exc
+            raise RequestInterpretationError('LLM project naming did not return valid JSON.', trace=trace) from exc
-        project_name, repo_name = self._normalize_project_identity(parsed)
+        try:
            project_name, repo_name = self._normalize_project_identity(parsed)
        except RuntimeError as exc:
            raise RequestInterpretationError(str(exc), trace=trace) from exc
        repo_name = self._ensure_unique_repo_name(repo_name, constraints['repo_names'])
        interpreted['name'] = project_name
        routing['project_name'] = project_name
@@ -362,8 +390,30 @@ class RequestInterpreter:
    def _normalize_project_identity(self, payload: dict) -> tuple[str, str]:
        """Validate model-proposed project and repository naming."""
-        project_candidate = str(payload.get('project_name') or payload.get('name') or '').strip()
+        project_payload = payload.get('project') if isinstance(payload.get('project'), dict) else {}
-        repo_candidate = str(payload.get('repo_name') or '').strip()
+        repository_payload = payload.get('repository') if isinstance(payload.get('repository'), dict) else {}
        project_candidate = str(
            payload.get('project_name')
            or payload.get('name')
            or payload.get('title')
            or payload.get('display_name')
            or project_payload.get('project_name')
            or project_payload.get('name')
            or project_payload.get('title')
            or project_payload.get('display_name')
            or ''
        ).strip()
        repo_candidate = str(
            payload.get('repo_name')
            or payload.get('repo')
            or payload.get('slug')
            or payload.get('repository_name')
            or payload.get('repository_slug')
            or repository_payload.get('repo_name')
            or repository_payload.get('name')
            or repository_payload.get('slug')
            or ''
        ).strip()
        if not project_candidate:
            raise RuntimeError('LLM project naming did not provide a project name.')
        if not repo_candidate:
--- a/ai_software_factory/config.py
+++ b/ai_software_factory/config.py
@@ -222,6 +222,7 @@ class Settings(BaseSettings):
    # Ollama settings computed from environment
    OLLAMA_URL: str = "http://ollama:11434"
    OLLAMA_MODEL: str = "llama3"
    LLM_REQUEST_TIMEOUT_SECONDS: int = 240
    LLM_GUARDRAIL_PROMPT: str = (
        "You are operating inside AI Software Factory. Follow the requested schema exactly, "
        "treat provided tool outputs as authoritative, and do not invent repositories, issues, pull requests, or delivery facts."
@@ -613,6 +614,11 @@ class Settings(BaseSettings):
        """Get the maximum number of queued prompts to process in one batch."""
        return max(int(_resolve_runtime_setting_value('PROMPT_QUEUE_MAX_BATCH_SIZE', self.PROMPT_QUEUE_MAX_BATCH_SIZE)), 1)
    @property
    def llm_request_timeout_seconds(self) -> int:
        """Get the outbound provider timeout for one LLM request."""
        return max(int(_resolve_runtime_setting_value('LLM_REQUEST_TIMEOUT_SECONDS', self.LLM_REQUEST_TIMEOUT_SECONDS)), 1)
    @property
    def projects_root(self) -> Path:
        """Get the root directory for generated project artifacts."""
--- a/ai_software_factory/dashboard_ui.py
+++ b/ai_software_factory/dashboard_ui.py
@@ -545,6 +545,52 @@ def _render_change_list(changes: list[dict]) -> None:
                    _render_side_by_side_diff(change['diff_text'])
 def _extract_llm_trace_error(trace: dict) -> str | None:
    """Extract one useful failure message from a persisted LLM trace."""
    if not isinstance(trace, dict):
        return None
    raw_response = trace.get('raw_response') if isinstance(trace.get('raw_response'), dict) else {}
    provider_trace = raw_response.get('provider_trace') if isinstance(raw_response.get('provider_trace'), dict) else {}
    provider_response = raw_response.get('provider_response') if isinstance(raw_response.get('provider_response'), dict) else {}
    nested_provider_response = provider_trace.get('provider_response') if isinstance(provider_trace.get('provider_response'), dict) else {}
    candidates = [
        raw_response.get('failure_message'),
        raw_response.get('error'),
        provider_response.get('error'),
        provider_trace.get('error'),
        nested_provider_response.get('error'),
    ]
    for candidate in candidates:
        if candidate:
            return str(candidate)
    return None
 def _build_llm_insights(traces: list[dict]) -> dict:
    """Summarize recent LLM activity for dashboard visibility."""
    normalized = [trace for trace in traces if isinstance(trace, dict)]
    by_stage: dict[str, int] = {}
    error_by_stage: dict[str, int] = {}
    recent_errors: list[dict] = []
    fallback_traces = 0
    for trace in normalized:
        stage = str(trace.get('stage') or 'unknown')
        by_stage[stage] = by_stage.get(stage, 0) + 1
        if trace.get('fallback_used'):
            fallback_traces += 1
        if _extract_llm_trace_error(trace):
            error_by_stage[stage] = error_by_stage.get(stage, 0) + 1
            recent_errors.append(trace)
    return {
        'total_traces': len(normalized),
        'error_traces': len(recent_errors),
        'fallback_traces': fallback_traces,
        'by_stage': by_stage,
        'error_by_stage': error_by_stage,
        'recent_errors': recent_errors[:8],
    }
 def _render_llm_traces(traces: list[dict]) -> None:
    """Render persisted LLM request/response traces for a prompt."""
    if not traces:
@@ -560,12 +606,17 @@ def _render_llm_traces(traces: list[dict]) -> None:
                ui.label(f'{provider}:{model}').classes('factory-chip')
            if trace.get('fallback_used'):
                ui.label('Fallback path used').classes('factory-chip')
            error_message = _extract_llm_trace_error(trace)
            if error_message:
                ui.label(error_message).classes('factory-chip')
            with ui.expansion('System prompt').classes('w-full q-mt-sm'):
                ui.label(trace.get('system_prompt') or 'No system prompt recorded').classes('factory-code')
            with ui.expansion('User prompt').classes('w-full q-mt-sm'):
                ui.label(trace.get('user_prompt') or 'No user prompt recorded').classes('factory-code')
            with ui.expansion('Assistant response').classes('w-full q-mt-sm'):
                ui.label(trace.get('assistant_response') or 'No assistant response recorded').classes('factory-code')
            with ui.expansion('Raw provider response').classes('w-full q-mt-sm'):
                ui.label(json.dumps(trace.get('raw_response'), indent=2, sort_keys=True) if trace.get('raw_response') is not None else 'No raw response recorded').classes('factory-code')
 def _filter_llm_traces(traces: list[dict], stage: str, model: str, search_query: str) -> list[dict]:
@@ -725,6 +776,20 @@ def _load_home_assistant_health_snapshot() -> dict:
        }
 def _load_ollama_health_snapshot() -> dict:
    """Load an Ollama health snapshot for UI rendering."""
    try:
        return LLMServiceClient().health_check_sync()
    except Exception as exc:
        return {
            'status': 'error',
            'message': f'Unable to run Ollama health checks: {exc}',
            'ollama_url': settings.ollama_url or 'Not configured',
            'model': settings.OLLAMA_MODEL,
            'checks': [],
        }
 def _add_dashboard_styles() -> None:
    """Register shared dashboard styles."""
    ui.add_head_html(
@@ -821,6 +886,7 @@ def _render_confirmation_dialog(title: str, message: str, confirm_label: str, on
 def _render_health_panels() -> None:
    """Render application, integration, and queue health panels."""
    runtime = get_database_runtime_summary()
    ollama_health = _load_ollama_health_snapshot()
    n8n_health = _load_n8n_health_snapshot()
    gitea_health = _load_gitea_health_snapshot()
    home_assistant_health = _load_home_assistant_health_snapshot()
@@ -843,6 +909,33 @@ def _render_health_panels() -> None:
                    ui.label(label).classes('factory-muted')
                    ui.label(str(value)).style('font-weight: 600; color: #3a281a;')
        with ui.card().classes('factory-panel q-pa-lg'):
            ui.label('Ollama / LLM').style('font-size: 1.25rem; font-weight: 700; color: #3a281a;')
            ui.label(ollama_health.get('status', 'unknown').upper()).classes('factory-chip')
            ui.label(ollama_health.get('message', 'No Ollama status available.')).classes('factory-muted q-mt-sm')
            rows = [
                ('Ollama URL', ollama_health.get('ollama_url') or 'Not configured'),
                ('Configured Model', ollama_health.get('model') or 'Not configured'),
                ('Model Available', 'yes' if ollama_health.get('model_available') else 'no'),
                ('Visible Models', ollama_health.get('model_count') if ollama_health.get('model_count') is not None else 'unknown'),
            ]
            for label, value in rows:
                with ui.row().classes('justify-between w-full q-mt-sm'):
                    ui.label(label).classes('factory-muted')
                    ui.label(str(value)).style('font-weight: 600; color: #3a281a;')
            if ollama_health.get('models'):
                ui.label('Reported Models').style('font-size: 1rem; font-weight: 700; color: #3a281a; margin-top: 12px;')
                ui.label(', '.join(str(model) for model in ollama_health.get('models', []))).classes('factory-muted')
            if ollama_health.get('suggestion'):
                ui.label(ollama_health['suggestion']).classes('factory-chip q-mt-md')
            for check in ollama_health.get('checks', []):
                status = 'OK' if check.get('ok') else 'FAIL'
                ui.markdown(
                    f"- **{escape(check.get('name', 'check'))}** · {status} · {escape(str(check.get('status_code') or 'n/a'))} · {escape(check.get('url') or 'unknown url')}"
                )
                if check.get('message'):
                    ui.label(check['message']).classes('factory-muted')
        with ui.card().classes('factory-panel q-pa-lg'):
            ui.label('n8n Connection Status').style('font-size: 1.25rem; font-weight: 700; color: #3a281a;')
            status_label = n8n_health.get('status', 'unknown').upper()
@@ -930,7 +1023,7 @@ def create_health_page() -> None:
            with ui.row().classes('items-center justify-between w-full'):
                with ui.column().classes('gap-1'):
                    ui.label('Factory Health').style('font-size: 2rem; font-weight: 800; color: #302116;')
-                    ui.label('Current application and n8n connectivity diagnostics.').classes('factory-muted')
+                    ui.label('Current application, Ollama, and integration connectivity diagnostics.').classes('factory-muted')
                with ui.row().classes('items-center gap-2'):
                    ui.link('Back to Dashboard', '/')
                    ui.link('Refresh Health', '/health-ui')
@@ -1431,6 +1524,7 @@ def create_dashboard():
            }
        projects = snapshot['projects']
        all_llm_traces = [trace for project_bundle in projects for trace in project_bundle.get('llm_traces', [])]
        recent_llm_traces = snapshot.get('recent_llm_traces', [])
        return {
            'snapshot': snapshot,
            'summary': snapshot['summary'],
@@ -1448,6 +1542,8 @@ def create_dashboard():
            'discovered_repositories': discovered_repositories,
            'prompt_settings': prompt_settings,
            'runtime_settings': runtime_settings,
            'recent_llm_traces': recent_llm_traces,
            'llm_insights': _build_llm_insights(recent_llm_traces),
            'llm_stage_options': [''] + sorted({trace.get('stage') for trace in all_llm_traces if trace.get('stage')}),
            'llm_model_options': [''] + sorted({trace.get('model') for trace in all_llm_traces if trace.get('model')}),
            'project_repository_map': {
@@ -1912,6 +2008,7 @@ def create_dashboard():
            return
        system_logs = view_model['system_logs']
        llm_runtime = view_model['llm_runtime']
        llm_insights = view_model.get('llm_insights', {})
        discovered_repositories = view_model['discovered_repositories']
        prompt_settings = view_model.get('prompt_settings', [])
        runtime_settings = view_model.get('runtime_settings', [])
@@ -1929,6 +2026,7 @@ def create_dashboard():
                    ('Provider', llm_runtime.get('provider')),
                    ('Model', llm_runtime.get('model')),
                    ('Ollama URL', llm_runtime.get('ollama_url')),
                    ('Request Timeout', str(llm_runtime.get('request_timeout_seconds') or 'n/a')),
                    ('Tool Context Limit', str(llm_runtime.get('tool_context_limit'))),
                    ('Max Tool Call Rounds', str(llm_runtime.get('max_tool_call_rounds'))),
                    ('Live Gitea Tools Configured', 'yes' if llm_runtime.get('gitea_live_tools_configured') else 'no'),
@@ -1965,6 +2063,25 @@ def create_dashboard():
                    for label, text in system_prompts.items():
                        ui.label(label.replace('_', ' ').title()).classes('factory-muted q-mt-sm')
                        ui.label(text or 'Not configured').classes('factory-code')
            with ui.card().classes('factory-panel q-pa-lg'):
                ui.label('LLM Insights').style('font-size: 1.25rem; font-weight: 700; color: #3a281a;')
                for label, value in [
                    ('Recent Traces', llm_insights.get('total_traces', 0)),
                    ('Recent Errors', llm_insights.get('error_traces', 0)),
                    ('Fallback Traces', llm_insights.get('fallback_traces', 0)),
                ]:
                    with ui.row().classes('justify-between w-full q-mt-sm'):
                        ui.label(label).classes('factory-muted')
                        ui.label(str(value)).style('font-weight: 600; color: #3a281a;')
                if llm_insights.get('by_stage'):
                    ui.label('Trace Volume By Stage').style('font-weight: 700; color: #3a281a; margin-top: 12px;')
                    for stage_name, count in sorted(llm_insights.get('by_stage', {}).items()):
                        error_count = llm_insights.get('error_by_stage', {}).get(stage_name, 0)
                        ui.markdown(f"- **{stage_name}**: {count} trace(s), {error_count} error(s)")
                if llm_insights.get('recent_errors'):
                    ui.label('Recent LLM Errors').style('font-weight: 700; color: #3a281a; margin-top: 12px;')
                    for trace in llm_insights.get('recent_errors', []):
                        ui.markdown(f"- **{trace.get('stage') or 'llm'}** · {trace.get('timestamp') or 'n/a'} · {escape(_extract_llm_trace_error(trace) or 'Unknown error')}")
            with ui.card().classes('factory-panel q-pa-lg'):
                ui.label('Home Assistant and Queue Settings').style('font-size: 1.25rem; font-weight: 700; color: #3a281a;')
                ui.label('Keep only the Home Assistant base URL and access token in the environment. Entity ids, thresholds, and queue behavior are edited here and persisted in the database.').classes('factory-muted')
--- a/ai_software_factory/main.py
+++ b/ai_software_factory/main.py
@@ -31,11 +31,12 @@ try:
    from .agents.change_summary import ChangeSummaryGenerator
    from .agents.database_manager import DatabaseManager
    from .agents.home_assistant import HomeAssistantAgent
-    from .agents.request_interpreter import RequestInterpreter
+    from .agents.request_interpreter import RequestInterpreter, RequestInterpretationError
    from .agents.llm_service import LLMServiceClient
    from .agents.orchestrator import AgentOrchestrator
    from .agents.n8n_setup import N8NSetupAgent
    from .agents.prompt_workflow import PromptWorkflowManager
    from .agents.telegram import TelegramHandler
    from .agents.ui_manager import UIManager
    from .models import ProjectHistory, ProjectLog, SystemLog
 except ImportError:
@@ -44,11 +45,12 @@ except ImportError:
    from agents.change_summary import ChangeSummaryGenerator
    from agents.database_manager import DatabaseManager
    from agents.home_assistant import HomeAssistantAgent
-    from agents.request_interpreter import RequestInterpreter
+    from agents.request_interpreter import RequestInterpreter, RequestInterpretationError
    from agents.llm_service import LLMServiceClient
    from agents.orchestrator import AgentOrchestrator
    from agents.n8n_setup import N8NSetupAgent
    from agents.prompt_workflow import PromptWorkflowManager
    from agents.telegram import TelegramHandler
    from agents.ui_manager import UIManager
    from models import ProjectHistory, ProjectLog, SystemLog
@@ -78,6 +80,7 @@ app = FastAPI(lifespan=lifespan)
 DbSession = Annotated[Session, Depends(database_module.get_db)]
 PROJECT_ID_PATTERN = re.compile(r"[^a-z0-9]+")
 UNASSIGNED_LLM_TRACE_PROJECT_ID = '__unassigned__'
 class SoftwareRequest(BaseModel):
@@ -202,6 +205,9 @@ async def _derive_project_id_for_request(
        expect_json=True,
    )
    if not content:
        detail = LLMServiceClient.extract_error_message(trace)
        if detail:
            raise RuntimeError(f'LLM project id naming failed: {detail}')
        raise RuntimeError('LLM project id naming did not return a usable response.')
    try:
        parsed = json.loads(content)
@@ -253,6 +259,63 @@ def _ensure_summary_mentions_pull_request(summary_message: str, pull_request: di
    return f"{summary_message}{separator} Review PR: {pr_url}"
 def _should_queue_telegram_request(request: FreeformSoftwareRequest) -> bool:
    """Return whether a Telegram request should be accepted for background processing."""
    return (
        request.source == 'telegram'
        and bool(request.chat_id)
        and bool(database_module.settings.telegram_bot_token)
        and not request.process_now
    )
 def _schedule_prompt_queue_processing() -> None:
    """Kick off background queue processing without blocking the current HTTP request."""
    if database_module.settings.prompt_queue_enabled and not database_module.settings.prompt_queue_auto_process:
        return
    limit = database_module.settings.prompt_queue_max_batch_size if database_module.settings.prompt_queue_enabled else 1
    force = database_module.settings.prompt_queue_force_process if database_module.settings.prompt_queue_enabled else True
    task = asyncio.create_task(_process_prompt_queue_batch(limit=limit, force=force))
    def _log_task_result(completed_task: asyncio.Task) -> None:
        try:
            completed_task.result()
        except Exception as exc:
            db = database_module.get_db_sync()
            try:
                DatabaseManager(db).log_system_event('prompt-queue', 'ERROR', f'Background queue processing failed: {exc}')
            finally:
                db.close()
    task.add_done_callback(_log_task_result)
 async def _notify_telegram_queue_result(request: FreeformSoftwareRequest, *, response: dict | None = None, error_message: str | None = None) -> None:
    """Send the final queued result back to Telegram when chat metadata is available."""
    if request.source != 'telegram' or not request.chat_id or not database_module.settings.telegram_bot_token:
        return
    if response is not None:
        message = (
            response.get('summary_message')
            or (response.get('data') or {}).get('summary_message')
            or response.get('message')
            or 'Software generation completed.'
        )
    else:
        message = f"Software generation failed: {error_message or 'Unknown error'}"
    result = await TelegramHandler(webhook_url=database_module.settings.backend_public_url).send_message(
        bot_token=database_module.settings.telegram_bot_token,
        chat_id=request.chat_id,
        text=message,
    )
    if result.get('status') == 'error':
        db = database_module.get_db_sync()
        try:
            DatabaseManager(db).log_system_event('telegram', 'ERROR', f"Unable to send queued Telegram update: {result.get('message')}")
        finally:
            db.close()
 def _serialize_system_log(log: SystemLog) -> dict:
    """Serialize a system log row."""
    return {
@@ -283,6 +346,51 @@ def _compose_prompt_text(request: SoftwareRequest) -> str:
    )
 def _generation_error_payload(
    *,
    message: str,
    request: SoftwareRequest | None = None,
    source: dict | None = None,
    interpreted_request: dict | None = None,
    routing: dict | None = None,
 ) -> dict:
    """Return a workflow-safe JSON payload for expected generation failures."""
    response = {
        'status': 'error',
        'message': message,
        'error': message,
        'summary_message': message,
        'summary_metadata': {
            'provider': None,
            'model': None,
            'fallback_used': False,
        },
        'data': {
            'history_id': None,
            'project_id': None,
            'name': request.name if request is not None else (interpreted_request or {}).get('name'),
            'description': request.description if request is not None else (interpreted_request or {}).get('description'),
            'status': 'error',
            'progress': 0,
            'message': message,
            'current_step': None,
            'error_message': message,
            'logs': [],
            'changed_files': [],
            'repository': None,
            'pull_request': None,
            'summary_message': message,
        },
    }
    if source is not None:
        response['source'] = source
    if interpreted_request is not None:
        response['interpreted_request'] = interpreted_request
    if routing is not None:
        response['routing'] = routing
    return response
 async def _run_generation(
    request: SoftwareRequest,
    db: Session,
@@ -520,6 +628,11 @@ def _get_home_assistant_health() -> dict:
    return _create_home_assistant_agent().health_check_sync()
 def _get_ollama_health() -> dict:
    """Return current Ollama connectivity diagnostics."""
    return LLMServiceClient().health_check_sync()
 async def _get_queue_gate_status(force: bool = False) -> dict:
    """Return whether queued prompts may be processed now."""
    if not database_module.settings.prompt_queue_enabled:
@@ -561,6 +674,48 @@ async def _interpret_freeform_request(request: FreeformSoftwareRequest, manager:
    return SoftwareRequest(**interpreted), routing, interpretation_trace
 def _persist_llm_trace(manager: DatabaseManager, *, project_id: str, trace: dict, prompt_id: int | None = None, history_id: int | None = None) -> None:
    """Persist one LLM trace payload when enough metadata is available."""
    if not isinstance(trace, dict) or not trace.get('stage'):
        return
    manager.log_llm_trace(
        project_id=project_id,
        history_id=history_id,
        prompt_id=prompt_id,
        stage=trace['stage'],
        provider=trace.get('provider') or 'unknown',
        model=trace.get('model') or 'unknown',
        system_prompt=trace.get('system_prompt') or '',
        user_prompt=trace.get('user_prompt') or '',
        assistant_response=trace.get('assistant_response') or '',
        raw_response=trace.get('raw_response'),
        fallback_used=trace.get('fallback_used', False),
    )
 def _persist_failed_freeform_llm_traces(manager: DatabaseManager, *, request: FreeformSoftwareRequest, error: RequestInterpretationError) -> None:
    """Persist failed interpretation traces under an unassigned bucket for dashboard inspection."""
    trace = error.trace if isinstance(error.trace, dict) else {}
    if not trace:
        return
    base_trace = dict(trace)
    base_trace['raw_response'] = {
        'failure_message': str(error),
        'source': {'type': request.source, 'chat_id': request.chat_id, 'chat_type': request.chat_type},
        'provider_trace': trace.get('raw_response'),
    }
    _persist_llm_trace(manager, project_id=UNASSIGNED_LLM_TRACE_PROJECT_ID, trace=base_trace)
    naming_trace = trace.get('project_naming') if isinstance(trace.get('project_naming'), dict) else None
    if naming_trace:
        enriched_naming_trace = dict(naming_trace)
        enriched_naming_trace['raw_response'] = {
            'failure_message': str(error),
            'source': {'type': request.source, 'chat_id': request.chat_id, 'chat_type': request.chat_type},
            'provider_trace': naming_trace.get('raw_response'),
        }
        _persist_llm_trace(manager, project_id=UNASSIGNED_LLM_TRACE_PROJECT_ID, trace=enriched_naming_trace)
 async def _run_freeform_generation(
    request: FreeformSoftwareRequest,
    db: Session,
@@ -591,33 +746,21 @@ async def _run_freeform_generation(
            manager = DatabaseManager(db)
            prompts = manager.get_prompt_events(project_id=project_data.get('project_id'))
            prompt_id = prompts[0]['id'] if prompts else None
-            manager.log_llm_trace(
+            _persist_llm_trace(
                manager,
                project_id=project_data.get('project_id'),
                history_id=project_data.get('history_id'),
                prompt_id=prompt_id,
-                stage=interpretation_trace['stage'],
+                trace=interpretation_trace,
                provider=interpretation_trace['provider'],
                model=interpretation_trace['model'],
                system_prompt=interpretation_trace['system_prompt'],
                user_prompt=interpretation_trace['user_prompt'],
                assistant_response=interpretation_trace['assistant_response'],
                raw_response=interpretation_trace.get('raw_response'),
                fallback_used=interpretation_trace.get('fallback_used', False),
            )
            naming_trace = interpretation_trace.get('project_naming')
            if naming_trace:
-                manager.log_llm_trace(
+                _persist_llm_trace(
                    manager,
                    project_id=project_data.get('project_id'),
                    history_id=project_data.get('history_id'),
                    prompt_id=prompt_id,
-                    stage=naming_trace['stage'],
+                    trace=naming_trace,
                    provider=naming_trace['provider'],
                    model=naming_trace['model'],
                    system_prompt=naming_trace['system_prompt'],
                    user_prompt=naming_trace['user_prompt'],
                    assistant_response=naming_trace['assistant_response'],
                    raw_response=naming_trace.get('raw_response'),
                    fallback_used=naming_trace.get('fallback_used', False),
                )
        response['interpreted_request'] = structured_request.model_dump()
        response['routing'] = routing
@@ -637,6 +780,11 @@ async def _run_freeform_generation(
                },
            )
        return response
    except RequestInterpretationError as exc:
        _persist_failed_freeform_llm_traces(manager, request=request, error=exc)
        if queue_item_id is not None:
            DatabaseManager(db).fail_queued_prompt(queue_item_id, str(exc))
        raise
    except Exception as exc:
        if queue_item_id is not None:
            DatabaseManager(db).fail_queued_prompt(queue_item_id, str(exc))
@@ -679,6 +827,7 @@ async def _process_prompt_queue_batch(limit: int = 1, force: bool = False) -> di
                process_now=True,
            )
            response = await _run_freeform_generation(request, work_db, queue_item_id=claimed['id'])
            await _notify_telegram_queue_result(request, response=response)
            processed.append(
                {
                    'queue_item_id': claimed['id'],
@@ -688,6 +837,7 @@ async def _process_prompt_queue_batch(limit: int = 1, force: bool = False) -> di
            )
        except Exception as exc:
            DatabaseManager(work_db).fail_queued_prompt(claimed['id'], str(exc))
            await _notify_telegram_queue_result(request, error_message=str(exc))
            processed.append({'queue_item_id': claimed['id'], 'status': 'failed', 'error': str(exc)})
        finally:
            work_db.close()
@@ -803,6 +953,7 @@ def health_check():
        'database_target': runtime['target'],
        'database_name': runtime['database'],
        'integrations': {
            'ollama': _get_ollama_health(),
            'gitea': _get_gitea_health(),
            'home_assistant': _get_home_assistant_health(),
        },
@@ -876,7 +1027,15 @@ def reset_runtime_setting(setting_key: str, db: DbSession):
@app.post('/generate')
 async def generate_software(request: SoftwareRequest, db: DbSession):
    """Create and record a software-generation request."""
-    return await _run_generation(request, db)
+    try:
        return await _run_generation(request, db)
    except Exception as exc:
        DatabaseManager(db).log_system_event(
            component='api',
            level='ERROR',
            message=f"Structured generation failed: {exc}",
        )
        return _generation_error_payload(message=str(exc), request=request)
@app.post('/generate/text')
@@ -898,7 +1057,7 @@ async def generate_software_from_text(request: FreeformSoftwareRequest, db: DbSe
            },
        }
-    if request.source == 'telegram' and database_module.settings.prompt_queue_enabled and not request.process_now:
+    if _should_queue_telegram_request(request):
        manager = DatabaseManager(db)
        queue_item = manager.enqueue_prompt(
            prompt_text=request.prompt_text,
@@ -907,12 +1066,19 @@ async def generate_software_from_text(request: FreeformSoftwareRequest, db: DbSe
            chat_type=request.chat_type,
            source_context={'chat_id': request.chat_id, 'chat_type': request.chat_type},
        )
        queue_gate = await _get_queue_gate_status(force=False)
        if not database_module.settings.prompt_queue_enabled or database_module.settings.prompt_queue_auto_process:
            _schedule_prompt_queue_processing()
        return {
            'status': 'queued',
-            'message': 'Prompt queued for energy-aware processing.',
+            'message': (
                'Prompt accepted for background processing.'
                if not database_module.settings.prompt_queue_enabled else
                'Prompt queued for background processing.'
            ),
            'queue_item': queue_item,
            'queue_summary': manager.get_prompt_queue_summary(),
-            'queue_gate': await _get_queue_gate_status(force=False),
+            'queue_gate': queue_gate,
            'source': {
                'type': request.source,
                'chat_id': request.chat_id,
@@ -920,7 +1086,22 @@ async def generate_software_from_text(request: FreeformSoftwareRequest, db: DbSe
            },
        }
-    return await _run_freeform_generation(request, db)
+    try:
        return await _run_freeform_generation(request, db)
    except Exception as exc:
        DatabaseManager(db).log_system_event(
            component='api',
            level='ERROR',
            message=f"Free-form generation failed for source={request.source}: {exc}",
        )
        return _generation_error_payload(
            message=str(exc),
            source={
                'type': request.source,
                'chat_id': request.chat_id,
                'chat_type': request.chat_type,
            },
        )
@app.get('/queue')
Author	SHA1	Message	Date
Simon Diesenreiter	80c11511d2	release: version 0.9.16 🚀 All checks were successful Upload Python Package / Create Release (push) Successful in 13s Details Upload Python Package / deploy (push) Successful in 36s Details	2026-04-11 22:38:41 +02:00
Simon Diesenreiter	0614f7573f	fix: better LLM failure tracing, refs NOISSUE	2026-04-11 22:38:38 +02:00
Simon Diesenreiter	2eba98dff4	release: version 0.9.15 🚀 All checks were successful Upload Python Package / Create Release (push) Successful in 15s Details Upload Python Package / deploy (push) Successful in 33s Details	2026-04-11 22:20:14 +02:00
Simon Diesenreiter	c437ae0173	fix: increase LLM timeouts, refs NOISSUE	2026-04-11 22:19:42 +02:00
Simon Diesenreiter	0770b254b1	release: version 0.9.14 🚀 All checks were successful Upload Python Package / Create Release (push) Successful in 19s Details Upload Python Package / deploy (push) Successful in 58s Details	2026-04-11 21:40:53 +02:00
Simon Diesenreiter	e651e3324d	fix: add Ollama connection health details in UI, refs NOISSUE	2026-04-11 21:40:50 +02:00
Simon Diesenreiter	bbe0279af4	release: version 0.9.13 🚀 All checks were successful Upload Python Package / Create Release (push) Successful in 16s Details Upload Python Package / deploy (push) Successful in 29s Details	2026-04-11 21:17:16 +02:00
Simon Diesenreiter	5e5e7b4f35	fix: fix internal server error, refs NOISSUE	2026-04-11 21:17:12 +02:00