4 Commits

Author SHA1 Message Date
80c11511d2 release: version 0.9.16 🚀
All checks were successful
Upload Python Package / Create Release (push) Successful in 13s
Upload Python Package / deploy (push) Successful in 36s
2026-04-11 22:38:41 +02:00
0614f7573f fix: better LLM failure tracing, refs NOISSUE 2026-04-11 22:38:38 +02:00
2eba98dff4 release: version 0.9.15 🚀
All checks were successful
Upload Python Package / Create Release (push) Successful in 15s
Upload Python Package / deploy (push) Successful in 33s
2026-04-11 22:20:14 +02:00
c437ae0173 fix: increase LLM timeouts, refs NOISSUE 2026-04-11 22:19:42 +02:00
8 changed files with 300 additions and 42 deletions

View File

@@ -5,11 +5,33 @@ Changelog
(unreleased) (unreleased)
------------ ------------
Fix
~~~
- Better LLM failure tracing, refs NOISSUE. [Simon Diesenreiter]
0.9.15 (2026-04-11)
-------------------
Fix
~~~
- Increase LLM timeouts, refs NOISSUE. [Simon Diesenreiter]
Other
~~~~~
0.9.14 (2026-04-11)
-------------------
Fix Fix
~~~ ~~~
- Add Ollama connection health details in UI, refs NOISSUE. [Simon - Add Ollama connection health details in UI, refs NOISSUE. [Simon
Diesenreiter] Diesenreiter]
Other
~~~~~
0.9.13 (2026-04-11) 0.9.13 (2026-04-11)
------------------- -------------------

View File

@@ -1 +1 @@
0.9.14 0.9.16

View File

@@ -2503,6 +2503,7 @@ class DatabaseManager:
archived_project_bundles = [self.get_project_audit_data(project.project_id) for project in archived_projects[:limit]] archived_project_bundles = [self.get_project_audit_data(project.project_id) for project in archived_projects[:limit]]
all_project_bundles = [self.get_project_audit_data(project.project_id) for project in active_projects] all_project_bundles = [self.get_project_audit_data(project.project_id) for project in active_projects]
all_project_bundles.extend(self.get_project_audit_data(project.project_id) for project in archived_projects) all_project_bundles.extend(self.get_project_audit_data(project.project_id) for project in archived_projects)
recent_llm_traces = self.get_llm_traces(limit=limit * 20)
system_logs = self.db.query(SystemLog).order_by(SystemLog.created_at.desc()).limit(limit).all() system_logs = self.db.query(SystemLog).order_by(SystemLog.created_at.desc()).limit(limit).all()
return { return {
"summary": { "summary": {
@@ -2532,6 +2533,7 @@ class DatabaseManager:
} }
for log in system_logs for log in system_logs
], ],
"recent_llm_traces": recent_llm_traces,
"lineage_links": self.get_prompt_change_links(limit=limit * 10), "lineage_links": self.get_prompt_change_links(limit=limit * 10),
"correlations": self.get_prompt_change_correlations(limit=limit), "correlations": self.get_prompt_change_correlations(limit=limit),
"prompt_queue": { "prompt_queue": {

View File

@@ -185,6 +185,7 @@ class LLMServiceClient:
def __init__(self, ollama_url: str | None = None, model: str | None = None): def __init__(self, ollama_url: str | None = None, model: str | None = None):
self.ollama_url = (ollama_url or settings.ollama_url).rstrip('/') self.ollama_url = (ollama_url or settings.ollama_url).rstrip('/')
self.model = model or settings.OLLAMA_MODEL self.model = model or settings.OLLAMA_MODEL
self.request_timeout_seconds = settings.llm_request_timeout_seconds
self.toolbox = LLMToolbox() self.toolbox = LLMToolbox()
self.live_tool_executor = LLMLiveToolExecutor() self.live_tool_executor = LLMLiveToolExecutor()
@@ -290,13 +291,16 @@ class LLMServiceClient:
try: try:
import aiohttp import aiohttp
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=self.request_timeout_seconds)) as session:
async with session.post(f'{self.ollama_url}/api/chat', json=request_payload) as resp: async with session.post(f'{self.ollama_url}/api/chat', json=request_payload) as resp:
payload = await resp.json() payload = await resp.json()
if 200 <= resp.status < 300: if 200 <= resp.status < 300:
return (payload.get('message') or {}).get('content', ''), payload, None return (payload.get('message') or {}).get('content', ''), payload, None
return None, payload, str(payload.get('error') or payload) return None, payload, str(payload.get('error') or payload)
except Exception as exc: except Exception as exc:
if exc.__class__.__name__ == 'TimeoutError':
message = f'LLM request timed out after {self.request_timeout_seconds} seconds'
return None, {'error': message}, message
return None, {'error': str(exc)}, str(exc) return None, {'error': str(exc)}, str(exc)
@staticmethod @staticmethod
@@ -395,6 +399,7 @@ class LLMServiceClient:
'provider': 'ollama', 'provider': 'ollama',
'ollama_url': self.ollama_url, 'ollama_url': self.ollama_url,
'model': self.model, 'model': self.model,
'request_timeout_seconds': self.request_timeout_seconds,
'guardrails': { 'guardrails': {
'global': settings.llm_guardrail_prompt, 'global': settings.llm_guardrail_prompt,
'request_interpretation': settings.llm_request_interpreter_guardrail_prompt, 'request_interpretation': settings.llm_request_interpreter_guardrail_prompt,

View File

@@ -15,6 +15,14 @@ except ImportError:
from agents.llm_service import LLMServiceClient from agents.llm_service import LLMServiceClient
class RequestInterpretationError(RuntimeError):
"""Raised when one LLM-driven request interpretation stage fails."""
def __init__(self, message: str, *, trace: dict | None = None):
super().__init__(message)
self.trace = trace or {}
class RequestInterpreter: class RequestInterpreter:
"""Use Ollama to turn free-form text into a structured software request.""" """Use Ollama to turn free-form text into a structured software request."""
@@ -91,26 +99,37 @@ class RequestInterpreter:
if not content: if not content:
detail = self.llm_client.extract_error_message(trace) detail = self.llm_client.extract_error_message(trace)
if detail: if detail:
raise RuntimeError(f'LLM request interpretation failed: {detail}') raise RequestInterpretationError(f'LLM request interpretation failed: {detail}', trace=trace)
raise RuntimeError('LLM request interpretation did not return a usable response.') raise RequestInterpretationError('LLM request interpretation did not return a usable response.', trace=trace)
try: try:
parsed = json.loads(content) parsed = json.loads(content)
except Exception as exc: except Exception as exc:
raise RuntimeError('LLM request interpretation did not return valid JSON.') from exc raise RequestInterpretationError('LLM request interpretation did not return valid JSON.', trace=trace) from exc
try:
interpreted = self._normalize_interpreted_request(parsed) interpreted = self._normalize_interpreted_request(parsed)
routing = self._normalize_routing(parsed.get('routing'), interpreted, compact_context) routing = self._normalize_routing(parsed.get('routing'), interpreted, compact_context)
except RuntimeError as exc:
raise RequestInterpretationError(str(exc), trace=trace) from exc
if routing.get('intent') == 'continue_project' and routing.get('project_name'): if routing.get('intent') == 'continue_project' and routing.get('project_name'):
interpreted['name'] = routing['project_name'] interpreted['name'] = routing['project_name']
naming_trace = None naming_trace = None
if routing.get('intent') == 'new_project': if routing.get('intent') == 'new_project':
try:
interpreted, routing, naming_trace = await self._refine_new_project_identity( interpreted, routing, naming_trace = await self._refine_new_project_identity(
prompt_text=normalized, prompt_text=normalized,
interpreted=interpreted, interpreted=interpreted,
routing=routing, routing=routing,
context=compact_context, context=compact_context,
) )
except RequestInterpretationError as exc:
combined_trace = dict(trace)
combined_trace['routing'] = routing
combined_trace['context_excerpt'] = compact_context
if exc.trace:
combined_trace['project_naming'] = exc.trace
raise RequestInterpretationError(str(exc), trace=combined_trace) from exc
trace['routing'] = routing trace['routing'] = routing
trace['context_excerpt'] = compact_context trace['context_excerpt'] = compact_context
if naming_trace is not None: if naming_trace is not None:
@@ -146,15 +165,18 @@ class RequestInterpreter:
if not content: if not content:
detail = self.llm_client.extract_error_message(trace) detail = self.llm_client.extract_error_message(trace)
if detail: if detail:
raise RuntimeError(f'LLM project naming failed: {detail}') raise RequestInterpretationError(f'LLM project naming failed: {detail}', trace=trace)
raise RuntimeError('LLM project naming did not return a usable response.') raise RequestInterpretationError('LLM project naming did not return a usable response.', trace=trace)
try: try:
parsed = json.loads(content) parsed = json.loads(content)
except Exception as exc: except Exception as exc:
raise RuntimeError('LLM project naming did not return valid JSON.') from exc raise RequestInterpretationError('LLM project naming did not return valid JSON.', trace=trace) from exc
try:
project_name, repo_name = self._normalize_project_identity(parsed) project_name, repo_name = self._normalize_project_identity(parsed)
except RuntimeError as exc:
raise RequestInterpretationError(str(exc), trace=trace) from exc
repo_name = self._ensure_unique_repo_name(repo_name, constraints['repo_names']) repo_name = self._ensure_unique_repo_name(repo_name, constraints['repo_names'])
interpreted['name'] = project_name interpreted['name'] = project_name
routing['project_name'] = project_name routing['project_name'] = project_name
@@ -368,8 +390,30 @@ class RequestInterpreter:
def _normalize_project_identity(self, payload: dict) -> tuple[str, str]: def _normalize_project_identity(self, payload: dict) -> tuple[str, str]:
"""Validate model-proposed project and repository naming.""" """Validate model-proposed project and repository naming."""
project_candidate = str(payload.get('project_name') or payload.get('name') or '').strip() project_payload = payload.get('project') if isinstance(payload.get('project'), dict) else {}
repo_candidate = str(payload.get('repo_name') or '').strip() repository_payload = payload.get('repository') if isinstance(payload.get('repository'), dict) else {}
project_candidate = str(
payload.get('project_name')
or payload.get('name')
or payload.get('title')
or payload.get('display_name')
or project_payload.get('project_name')
or project_payload.get('name')
or project_payload.get('title')
or project_payload.get('display_name')
or ''
).strip()
repo_candidate = str(
payload.get('repo_name')
or payload.get('repo')
or payload.get('slug')
or payload.get('repository_name')
or payload.get('repository_slug')
or repository_payload.get('repo_name')
or repository_payload.get('name')
or repository_payload.get('slug')
or ''
).strip()
if not project_candidate: if not project_candidate:
raise RuntimeError('LLM project naming did not provide a project name.') raise RuntimeError('LLM project naming did not provide a project name.')
if not repo_candidate: if not repo_candidate:

View File

@@ -222,6 +222,7 @@ class Settings(BaseSettings):
# Ollama settings computed from environment # Ollama settings computed from environment
OLLAMA_URL: str = "http://ollama:11434" OLLAMA_URL: str = "http://ollama:11434"
OLLAMA_MODEL: str = "llama3" OLLAMA_MODEL: str = "llama3"
LLM_REQUEST_TIMEOUT_SECONDS: int = 240
LLM_GUARDRAIL_PROMPT: str = ( LLM_GUARDRAIL_PROMPT: str = (
"You are operating inside AI Software Factory. Follow the requested schema exactly, " "You are operating inside AI Software Factory. Follow the requested schema exactly, "
"treat provided tool outputs as authoritative, and do not invent repositories, issues, pull requests, or delivery facts." "treat provided tool outputs as authoritative, and do not invent repositories, issues, pull requests, or delivery facts."
@@ -613,6 +614,11 @@ class Settings(BaseSettings):
"""Get the maximum number of queued prompts to process in one batch.""" """Get the maximum number of queued prompts to process in one batch."""
return max(int(_resolve_runtime_setting_value('PROMPT_QUEUE_MAX_BATCH_SIZE', self.PROMPT_QUEUE_MAX_BATCH_SIZE)), 1) return max(int(_resolve_runtime_setting_value('PROMPT_QUEUE_MAX_BATCH_SIZE', self.PROMPT_QUEUE_MAX_BATCH_SIZE)), 1)
@property
def llm_request_timeout_seconds(self) -> int:
"""Get the outbound provider timeout for one LLM request."""
return max(int(_resolve_runtime_setting_value('LLM_REQUEST_TIMEOUT_SECONDS', self.LLM_REQUEST_TIMEOUT_SECONDS)), 1)
@property @property
def projects_root(self) -> Path: def projects_root(self) -> Path:
"""Get the root directory for generated project artifacts.""" """Get the root directory for generated project artifacts."""

View File

@@ -545,6 +545,52 @@ def _render_change_list(changes: list[dict]) -> None:
_render_side_by_side_diff(change['diff_text']) _render_side_by_side_diff(change['diff_text'])
def _extract_llm_trace_error(trace: dict) -> str | None:
"""Extract one useful failure message from a persisted LLM trace."""
if not isinstance(trace, dict):
return None
raw_response = trace.get('raw_response') if isinstance(trace.get('raw_response'), dict) else {}
provider_trace = raw_response.get('provider_trace') if isinstance(raw_response.get('provider_trace'), dict) else {}
provider_response = raw_response.get('provider_response') if isinstance(raw_response.get('provider_response'), dict) else {}
nested_provider_response = provider_trace.get('provider_response') if isinstance(provider_trace.get('provider_response'), dict) else {}
candidates = [
raw_response.get('failure_message'),
raw_response.get('error'),
provider_response.get('error'),
provider_trace.get('error'),
nested_provider_response.get('error'),
]
for candidate in candidates:
if candidate:
return str(candidate)
return None
def _build_llm_insights(traces: list[dict]) -> dict:
"""Summarize recent LLM activity for dashboard visibility."""
normalized = [trace for trace in traces if isinstance(trace, dict)]
by_stage: dict[str, int] = {}
error_by_stage: dict[str, int] = {}
recent_errors: list[dict] = []
fallback_traces = 0
for trace in normalized:
stage = str(trace.get('stage') or 'unknown')
by_stage[stage] = by_stage.get(stage, 0) + 1
if trace.get('fallback_used'):
fallback_traces += 1
if _extract_llm_trace_error(trace):
error_by_stage[stage] = error_by_stage.get(stage, 0) + 1
recent_errors.append(trace)
return {
'total_traces': len(normalized),
'error_traces': len(recent_errors),
'fallback_traces': fallback_traces,
'by_stage': by_stage,
'error_by_stage': error_by_stage,
'recent_errors': recent_errors[:8],
}
def _render_llm_traces(traces: list[dict]) -> None: def _render_llm_traces(traces: list[dict]) -> None:
"""Render persisted LLM request/response traces for a prompt.""" """Render persisted LLM request/response traces for a prompt."""
if not traces: if not traces:
@@ -560,12 +606,17 @@ def _render_llm_traces(traces: list[dict]) -> None:
ui.label(f'{provider}:{model}').classes('factory-chip') ui.label(f'{provider}:{model}').classes('factory-chip')
if trace.get('fallback_used'): if trace.get('fallback_used'):
ui.label('Fallback path used').classes('factory-chip') ui.label('Fallback path used').classes('factory-chip')
error_message = _extract_llm_trace_error(trace)
if error_message:
ui.label(error_message).classes('factory-chip')
with ui.expansion('System prompt').classes('w-full q-mt-sm'): with ui.expansion('System prompt').classes('w-full q-mt-sm'):
ui.label(trace.get('system_prompt') or 'No system prompt recorded').classes('factory-code') ui.label(trace.get('system_prompt') or 'No system prompt recorded').classes('factory-code')
with ui.expansion('User prompt').classes('w-full q-mt-sm'): with ui.expansion('User prompt').classes('w-full q-mt-sm'):
ui.label(trace.get('user_prompt') or 'No user prompt recorded').classes('factory-code') ui.label(trace.get('user_prompt') or 'No user prompt recorded').classes('factory-code')
with ui.expansion('Assistant response').classes('w-full q-mt-sm'): with ui.expansion('Assistant response').classes('w-full q-mt-sm'):
ui.label(trace.get('assistant_response') or 'No assistant response recorded').classes('factory-code') ui.label(trace.get('assistant_response') or 'No assistant response recorded').classes('factory-code')
with ui.expansion('Raw provider response').classes('w-full q-mt-sm'):
ui.label(json.dumps(trace.get('raw_response'), indent=2, sort_keys=True) if trace.get('raw_response') is not None else 'No raw response recorded').classes('factory-code')
def _filter_llm_traces(traces: list[dict], stage: str, model: str, search_query: str) -> list[dict]: def _filter_llm_traces(traces: list[dict], stage: str, model: str, search_query: str) -> list[dict]:
@@ -1473,6 +1524,7 @@ def create_dashboard():
} }
projects = snapshot['projects'] projects = snapshot['projects']
all_llm_traces = [trace for project_bundle in projects for trace in project_bundle.get('llm_traces', [])] all_llm_traces = [trace for project_bundle in projects for trace in project_bundle.get('llm_traces', [])]
recent_llm_traces = snapshot.get('recent_llm_traces', [])
return { return {
'snapshot': snapshot, 'snapshot': snapshot,
'summary': snapshot['summary'], 'summary': snapshot['summary'],
@@ -1490,6 +1542,8 @@ def create_dashboard():
'discovered_repositories': discovered_repositories, 'discovered_repositories': discovered_repositories,
'prompt_settings': prompt_settings, 'prompt_settings': prompt_settings,
'runtime_settings': runtime_settings, 'runtime_settings': runtime_settings,
'recent_llm_traces': recent_llm_traces,
'llm_insights': _build_llm_insights(recent_llm_traces),
'llm_stage_options': [''] + sorted({trace.get('stage') for trace in all_llm_traces if trace.get('stage')}), 'llm_stage_options': [''] + sorted({trace.get('stage') for trace in all_llm_traces if trace.get('stage')}),
'llm_model_options': [''] + sorted({trace.get('model') for trace in all_llm_traces if trace.get('model')}), 'llm_model_options': [''] + sorted({trace.get('model') for trace in all_llm_traces if trace.get('model')}),
'project_repository_map': { 'project_repository_map': {
@@ -1954,6 +2008,7 @@ def create_dashboard():
return return
system_logs = view_model['system_logs'] system_logs = view_model['system_logs']
llm_runtime = view_model['llm_runtime'] llm_runtime = view_model['llm_runtime']
llm_insights = view_model.get('llm_insights', {})
discovered_repositories = view_model['discovered_repositories'] discovered_repositories = view_model['discovered_repositories']
prompt_settings = view_model.get('prompt_settings', []) prompt_settings = view_model.get('prompt_settings', [])
runtime_settings = view_model.get('runtime_settings', []) runtime_settings = view_model.get('runtime_settings', [])
@@ -1971,6 +2026,7 @@ def create_dashboard():
('Provider', llm_runtime.get('provider')), ('Provider', llm_runtime.get('provider')),
('Model', llm_runtime.get('model')), ('Model', llm_runtime.get('model')),
('Ollama URL', llm_runtime.get('ollama_url')), ('Ollama URL', llm_runtime.get('ollama_url')),
('Request Timeout', str(llm_runtime.get('request_timeout_seconds') or 'n/a')),
('Tool Context Limit', str(llm_runtime.get('tool_context_limit'))), ('Tool Context Limit', str(llm_runtime.get('tool_context_limit'))),
('Max Tool Call Rounds', str(llm_runtime.get('max_tool_call_rounds'))), ('Max Tool Call Rounds', str(llm_runtime.get('max_tool_call_rounds'))),
('Live Gitea Tools Configured', 'yes' if llm_runtime.get('gitea_live_tools_configured') else 'no'), ('Live Gitea Tools Configured', 'yes' if llm_runtime.get('gitea_live_tools_configured') else 'no'),
@@ -2007,6 +2063,25 @@ def create_dashboard():
for label, text in system_prompts.items(): for label, text in system_prompts.items():
ui.label(label.replace('_', ' ').title()).classes('factory-muted q-mt-sm') ui.label(label.replace('_', ' ').title()).classes('factory-muted q-mt-sm')
ui.label(text or 'Not configured').classes('factory-code') ui.label(text or 'Not configured').classes('factory-code')
with ui.card().classes('factory-panel q-pa-lg'):
ui.label('LLM Insights').style('font-size: 1.25rem; font-weight: 700; color: #3a281a;')
for label, value in [
('Recent Traces', llm_insights.get('total_traces', 0)),
('Recent Errors', llm_insights.get('error_traces', 0)),
('Fallback Traces', llm_insights.get('fallback_traces', 0)),
]:
with ui.row().classes('justify-between w-full q-mt-sm'):
ui.label(label).classes('factory-muted')
ui.label(str(value)).style('font-weight: 600; color: #3a281a;')
if llm_insights.get('by_stage'):
ui.label('Trace Volume By Stage').style('font-weight: 700; color: #3a281a; margin-top: 12px;')
for stage_name, count in sorted(llm_insights.get('by_stage', {}).items()):
error_count = llm_insights.get('error_by_stage', {}).get(stage_name, 0)
ui.markdown(f"- **{stage_name}**: {count} trace(s), {error_count} error(s)")
if llm_insights.get('recent_errors'):
ui.label('Recent LLM Errors').style('font-weight: 700; color: #3a281a; margin-top: 12px;')
for trace in llm_insights.get('recent_errors', []):
ui.markdown(f"- **{trace.get('stage') or 'llm'}** · {trace.get('timestamp') or 'n/a'} · {escape(_extract_llm_trace_error(trace) or 'Unknown error')}")
with ui.card().classes('factory-panel q-pa-lg'): with ui.card().classes('factory-panel q-pa-lg'):
ui.label('Home Assistant and Queue Settings').style('font-size: 1.25rem; font-weight: 700; color: #3a281a;') ui.label('Home Assistant and Queue Settings').style('font-size: 1.25rem; font-weight: 700; color: #3a281a;')
ui.label('Keep only the Home Assistant base URL and access token in the environment. Entity ids, thresholds, and queue behavior are edited here and persisted in the database.').classes('factory-muted') ui.label('Keep only the Home Assistant base URL and access token in the environment. Entity ids, thresholds, and queue behavior are edited here and persisted in the database.').classes('factory-muted')

View File

@@ -31,11 +31,12 @@ try:
from .agents.change_summary import ChangeSummaryGenerator from .agents.change_summary import ChangeSummaryGenerator
from .agents.database_manager import DatabaseManager from .agents.database_manager import DatabaseManager
from .agents.home_assistant import HomeAssistantAgent from .agents.home_assistant import HomeAssistantAgent
from .agents.request_interpreter import RequestInterpreter from .agents.request_interpreter import RequestInterpreter, RequestInterpretationError
from .agents.llm_service import LLMServiceClient from .agents.llm_service import LLMServiceClient
from .agents.orchestrator import AgentOrchestrator from .agents.orchestrator import AgentOrchestrator
from .agents.n8n_setup import N8NSetupAgent from .agents.n8n_setup import N8NSetupAgent
from .agents.prompt_workflow import PromptWorkflowManager from .agents.prompt_workflow import PromptWorkflowManager
from .agents.telegram import TelegramHandler
from .agents.ui_manager import UIManager from .agents.ui_manager import UIManager
from .models import ProjectHistory, ProjectLog, SystemLog from .models import ProjectHistory, ProjectLog, SystemLog
except ImportError: except ImportError:
@@ -44,11 +45,12 @@ except ImportError:
from agents.change_summary import ChangeSummaryGenerator from agents.change_summary import ChangeSummaryGenerator
from agents.database_manager import DatabaseManager from agents.database_manager import DatabaseManager
from agents.home_assistant import HomeAssistantAgent from agents.home_assistant import HomeAssistantAgent
from agents.request_interpreter import RequestInterpreter from agents.request_interpreter import RequestInterpreter, RequestInterpretationError
from agents.llm_service import LLMServiceClient from agents.llm_service import LLMServiceClient
from agents.orchestrator import AgentOrchestrator from agents.orchestrator import AgentOrchestrator
from agents.n8n_setup import N8NSetupAgent from agents.n8n_setup import N8NSetupAgent
from agents.prompt_workflow import PromptWorkflowManager from agents.prompt_workflow import PromptWorkflowManager
from agents.telegram import TelegramHandler
from agents.ui_manager import UIManager from agents.ui_manager import UIManager
from models import ProjectHistory, ProjectLog, SystemLog from models import ProjectHistory, ProjectLog, SystemLog
@@ -78,6 +80,7 @@ app = FastAPI(lifespan=lifespan)
DbSession = Annotated[Session, Depends(database_module.get_db)] DbSession = Annotated[Session, Depends(database_module.get_db)]
PROJECT_ID_PATTERN = re.compile(r"[^a-z0-9]+") PROJECT_ID_PATTERN = re.compile(r"[^a-z0-9]+")
UNASSIGNED_LLM_TRACE_PROJECT_ID = '__unassigned__'
class SoftwareRequest(BaseModel): class SoftwareRequest(BaseModel):
@@ -256,6 +259,63 @@ def _ensure_summary_mentions_pull_request(summary_message: str, pull_request: di
return f"{summary_message}{separator} Review PR: {pr_url}" return f"{summary_message}{separator} Review PR: {pr_url}"
def _should_queue_telegram_request(request: FreeformSoftwareRequest) -> bool:
"""Return whether a Telegram request should be accepted for background processing."""
return (
request.source == 'telegram'
and bool(request.chat_id)
and bool(database_module.settings.telegram_bot_token)
and not request.process_now
)
def _schedule_prompt_queue_processing() -> None:
"""Kick off background queue processing without blocking the current HTTP request."""
if database_module.settings.prompt_queue_enabled and not database_module.settings.prompt_queue_auto_process:
return
limit = database_module.settings.prompt_queue_max_batch_size if database_module.settings.prompt_queue_enabled else 1
force = database_module.settings.prompt_queue_force_process if database_module.settings.prompt_queue_enabled else True
task = asyncio.create_task(_process_prompt_queue_batch(limit=limit, force=force))
def _log_task_result(completed_task: asyncio.Task) -> None:
try:
completed_task.result()
except Exception as exc:
db = database_module.get_db_sync()
try:
DatabaseManager(db).log_system_event('prompt-queue', 'ERROR', f'Background queue processing failed: {exc}')
finally:
db.close()
task.add_done_callback(_log_task_result)
async def _notify_telegram_queue_result(request: FreeformSoftwareRequest, *, response: dict | None = None, error_message: str | None = None) -> None:
"""Send the final queued result back to Telegram when chat metadata is available."""
if request.source != 'telegram' or not request.chat_id or not database_module.settings.telegram_bot_token:
return
if response is not None:
message = (
response.get('summary_message')
or (response.get('data') or {}).get('summary_message')
or response.get('message')
or 'Software generation completed.'
)
else:
message = f"Software generation failed: {error_message or 'Unknown error'}"
result = await TelegramHandler(webhook_url=database_module.settings.backend_public_url).send_message(
bot_token=database_module.settings.telegram_bot_token,
chat_id=request.chat_id,
text=message,
)
if result.get('status') == 'error':
db = database_module.get_db_sync()
try:
DatabaseManager(db).log_system_event('telegram', 'ERROR', f"Unable to send queued Telegram update: {result.get('message')}")
finally:
db.close()
def _serialize_system_log(log: SystemLog) -> dict: def _serialize_system_log(log: SystemLog) -> dict:
"""Serialize a system log row.""" """Serialize a system log row."""
return { return {
@@ -614,6 +674,48 @@ async def _interpret_freeform_request(request: FreeformSoftwareRequest, manager:
return SoftwareRequest(**interpreted), routing, interpretation_trace return SoftwareRequest(**interpreted), routing, interpretation_trace
def _persist_llm_trace(manager: DatabaseManager, *, project_id: str, trace: dict, prompt_id: int | None = None, history_id: int | None = None) -> None:
"""Persist one LLM trace payload when enough metadata is available."""
if not isinstance(trace, dict) or not trace.get('stage'):
return
manager.log_llm_trace(
project_id=project_id,
history_id=history_id,
prompt_id=prompt_id,
stage=trace['stage'],
provider=trace.get('provider') or 'unknown',
model=trace.get('model') or 'unknown',
system_prompt=trace.get('system_prompt') or '',
user_prompt=trace.get('user_prompt') or '',
assistant_response=trace.get('assistant_response') or '',
raw_response=trace.get('raw_response'),
fallback_used=trace.get('fallback_used', False),
)
def _persist_failed_freeform_llm_traces(manager: DatabaseManager, *, request: FreeformSoftwareRequest, error: RequestInterpretationError) -> None:
"""Persist failed interpretation traces under an unassigned bucket for dashboard inspection."""
trace = error.trace if isinstance(error.trace, dict) else {}
if not trace:
return
base_trace = dict(trace)
base_trace['raw_response'] = {
'failure_message': str(error),
'source': {'type': request.source, 'chat_id': request.chat_id, 'chat_type': request.chat_type},
'provider_trace': trace.get('raw_response'),
}
_persist_llm_trace(manager, project_id=UNASSIGNED_LLM_TRACE_PROJECT_ID, trace=base_trace)
naming_trace = trace.get('project_naming') if isinstance(trace.get('project_naming'), dict) else None
if naming_trace:
enriched_naming_trace = dict(naming_trace)
enriched_naming_trace['raw_response'] = {
'failure_message': str(error),
'source': {'type': request.source, 'chat_id': request.chat_id, 'chat_type': request.chat_type},
'provider_trace': naming_trace.get('raw_response'),
}
_persist_llm_trace(manager, project_id=UNASSIGNED_LLM_TRACE_PROJECT_ID, trace=enriched_naming_trace)
async def _run_freeform_generation( async def _run_freeform_generation(
request: FreeformSoftwareRequest, request: FreeformSoftwareRequest,
db: Session, db: Session,
@@ -644,33 +746,21 @@ async def _run_freeform_generation(
manager = DatabaseManager(db) manager = DatabaseManager(db)
prompts = manager.get_prompt_events(project_id=project_data.get('project_id')) prompts = manager.get_prompt_events(project_id=project_data.get('project_id'))
prompt_id = prompts[0]['id'] if prompts else None prompt_id = prompts[0]['id'] if prompts else None
manager.log_llm_trace( _persist_llm_trace(
manager,
project_id=project_data.get('project_id'), project_id=project_data.get('project_id'),
history_id=project_data.get('history_id'), history_id=project_data.get('history_id'),
prompt_id=prompt_id, prompt_id=prompt_id,
stage=interpretation_trace['stage'], trace=interpretation_trace,
provider=interpretation_trace['provider'],
model=interpretation_trace['model'],
system_prompt=interpretation_trace['system_prompt'],
user_prompt=interpretation_trace['user_prompt'],
assistant_response=interpretation_trace['assistant_response'],
raw_response=interpretation_trace.get('raw_response'),
fallback_used=interpretation_trace.get('fallback_used', False),
) )
naming_trace = interpretation_trace.get('project_naming') naming_trace = interpretation_trace.get('project_naming')
if naming_trace: if naming_trace:
manager.log_llm_trace( _persist_llm_trace(
manager,
project_id=project_data.get('project_id'), project_id=project_data.get('project_id'),
history_id=project_data.get('history_id'), history_id=project_data.get('history_id'),
prompt_id=prompt_id, prompt_id=prompt_id,
stage=naming_trace['stage'], trace=naming_trace,
provider=naming_trace['provider'],
model=naming_trace['model'],
system_prompt=naming_trace['system_prompt'],
user_prompt=naming_trace['user_prompt'],
assistant_response=naming_trace['assistant_response'],
raw_response=naming_trace.get('raw_response'),
fallback_used=naming_trace.get('fallback_used', False),
) )
response['interpreted_request'] = structured_request.model_dump() response['interpreted_request'] = structured_request.model_dump()
response['routing'] = routing response['routing'] = routing
@@ -690,6 +780,11 @@ async def _run_freeform_generation(
}, },
) )
return response return response
except RequestInterpretationError as exc:
_persist_failed_freeform_llm_traces(manager, request=request, error=exc)
if queue_item_id is not None:
DatabaseManager(db).fail_queued_prompt(queue_item_id, str(exc))
raise
except Exception as exc: except Exception as exc:
if queue_item_id is not None: if queue_item_id is not None:
DatabaseManager(db).fail_queued_prompt(queue_item_id, str(exc)) DatabaseManager(db).fail_queued_prompt(queue_item_id, str(exc))
@@ -732,6 +827,7 @@ async def _process_prompt_queue_batch(limit: int = 1, force: bool = False) -> di
process_now=True, process_now=True,
) )
response = await _run_freeform_generation(request, work_db, queue_item_id=claimed['id']) response = await _run_freeform_generation(request, work_db, queue_item_id=claimed['id'])
await _notify_telegram_queue_result(request, response=response)
processed.append( processed.append(
{ {
'queue_item_id': claimed['id'], 'queue_item_id': claimed['id'],
@@ -741,6 +837,7 @@ async def _process_prompt_queue_batch(limit: int = 1, force: bool = False) -> di
) )
except Exception as exc: except Exception as exc:
DatabaseManager(work_db).fail_queued_prompt(claimed['id'], str(exc)) DatabaseManager(work_db).fail_queued_prompt(claimed['id'], str(exc))
await _notify_telegram_queue_result(request, error_message=str(exc))
processed.append({'queue_item_id': claimed['id'], 'status': 'failed', 'error': str(exc)}) processed.append({'queue_item_id': claimed['id'], 'status': 'failed', 'error': str(exc)})
finally: finally:
work_db.close() work_db.close()
@@ -960,7 +1057,7 @@ async def generate_software_from_text(request: FreeformSoftwareRequest, db: DbSe
}, },
} }
if request.source == 'telegram' and database_module.settings.prompt_queue_enabled and not request.process_now: if _should_queue_telegram_request(request):
manager = DatabaseManager(db) manager = DatabaseManager(db)
queue_item = manager.enqueue_prompt( queue_item = manager.enqueue_prompt(
prompt_text=request.prompt_text, prompt_text=request.prompt_text,
@@ -969,12 +1066,19 @@ async def generate_software_from_text(request: FreeformSoftwareRequest, db: DbSe
chat_type=request.chat_type, chat_type=request.chat_type,
source_context={'chat_id': request.chat_id, 'chat_type': request.chat_type}, source_context={'chat_id': request.chat_id, 'chat_type': request.chat_type},
) )
queue_gate = await _get_queue_gate_status(force=False)
if not database_module.settings.prompt_queue_enabled or database_module.settings.prompt_queue_auto_process:
_schedule_prompt_queue_processing()
return { return {
'status': 'queued', 'status': 'queued',
'message': 'Prompt queued for energy-aware processing.', 'message': (
'Prompt accepted for background processing.'
if not database_module.settings.prompt_queue_enabled else
'Prompt queued for background processing.'
),
'queue_item': queue_item, 'queue_item': queue_item,
'queue_summary': manager.get_prompt_queue_summary(), 'queue_summary': manager.get_prompt_queue_summary(),
'queue_gate': await _get_queue_gate_status(force=False), 'queue_gate': queue_gate,
'source': { 'source': {
'type': request.source, 'type': request.source,
'chat_id': request.chat_id, 'chat_id': request.chat_id,