From 1944e2a9cfe3415291b46ba960626a59f2445d25 Mon Sep 17 00:00:00 2001 From: Simon Diesenreiter Date: Sat, 11 Apr 2026 14:33:45 +0200 Subject: [PATCH] fix: more file generation improvements, refs NOISSUE --- ai_software_factory/agents/orchestrator.py | 147 +++++++++++++++++---- 1 file changed, 123 insertions(+), 24 deletions(-) diff --git a/ai_software_factory/agents/orchestrator.py b/ai_software_factory/agents/orchestrator.py index 0278950..7eca8a2 100644 --- a/ai_software_factory/agents/orchestrator.py +++ b/ai_software_factory/agents/orchestrator.py @@ -7,6 +7,7 @@ import json import py_compile import re import subprocess +from pathlib import PurePosixPath from typing import Optional from datetime import datetime @@ -31,6 +32,10 @@ class AgentOrchestrator: REMOTE_READY_REPOSITORY_MODES = {'project', 'onboarded'} REMOTE_READY_REPOSITORY_STATUSES = {'created', 'exists', 'ready', 'onboarded'} + GENERATED_TEXT_FILE_SUFFIXES = {'.py', '.md', '.txt', '.toml', '.yaml', '.yml', '.json', '.ini', '.cfg', '.sh', '.html', '.css', '.js', '.ts'} + GENERATED_TEXT_FILE_NAMES = {'README', 'README.md', '.gitignore', 'requirements.txt', 'pyproject.toml', 'Dockerfile', 'Containerfile', 'Makefile'} + MAX_WORKSPACE_CONTEXT_FILES = 20 + MAX_WORKSPACE_CONTEXT_CHARS = 24000 def __init__( self, @@ -240,6 +245,59 @@ class AgentOrchestrator: fallback_used=False, ) + def _is_safe_relative_path(self, path: str) -> bool: + """Return whether a generated file path is safe to write under the project root.""" + normalized = str(PurePosixPath((path or '').strip())) + if not normalized or normalized in {'.', '..'}: + return False + if normalized.startswith('/') or normalized.startswith('../') or '/../' in normalized: + return False + if normalized.startswith('.git/'): + return False + return True + + def _is_supported_generated_text_file(self, path: str) -> bool: + """Return whether the generated path is a supported text artifact.""" + normalized = PurePosixPath(path) + if normalized.name in self.GENERATED_TEXT_FILE_NAMES: + return True + return normalized.suffix.lower() in self.GENERATED_TEXT_FILE_SUFFIXES + + def _collect_workspace_context(self) -> dict: + """Collect a compact, text-only snapshot of the current project workspace.""" + if not self.project_root.exists(): + return {'has_existing_files': False, 'files': []} + + files: list[dict] = [] + total_chars = 0 + for path in sorted(self.project_root.rglob('*')): + if not path.is_file(): + continue + relative_path = path.relative_to(self.project_root).as_posix() + if relative_path == '.gitignore': + continue + if not self._is_safe_relative_path(relative_path) or not self._is_supported_generated_text_file(relative_path): + continue + try: + content = path.read_text(encoding='utf-8') + except (UnicodeDecodeError, OSError): + continue + remaining_chars = self.MAX_WORKSPACE_CONTEXT_CHARS - total_chars + if remaining_chars <= 0: + break + snippet = content[:remaining_chars] + files.append( + { + 'path': relative_path, + 'content': snippet, + 'truncated': len(snippet) < len(content), + } + ) + total_chars += len(snippet) + if len(files) >= self.MAX_WORKSPACE_CONTEXT_FILES: + break + return {'has_existing_files': bool(files), 'files': files} + def _parse_generated_files(self, content: str | None) -> dict[str, str]: """Parse an LLM file bundle response into relative-path/content pairs.""" if not content: @@ -248,7 +306,6 @@ class AgentOrchestrator: parsed = json.loads(content) except Exception: return {} - allowed_paths = set(self._fallback_generated_files().keys()) generated: dict[str, str] = {} if isinstance(parsed, dict) and isinstance(parsed.get('files'), list): for item in parsed['files']: @@ -256,34 +313,68 @@ class AgentOrchestrator: continue path = str(item.get('path') or '').strip() file_content = item.get('content') - if path in allowed_paths and isinstance(file_content, str) and file_content.strip(): + if ( + self._is_safe_relative_path(path) + and self._is_supported_generated_text_file(path) + and isinstance(file_content, str) + and file_content.strip() + ): generated[path] = file_content.rstrip() + "\n" elif isinstance(parsed, dict): for path, file_content in parsed.items(): - if path in allowed_paths and isinstance(file_content, str) and file_content.strip(): - generated[str(path)] = file_content.rstrip() + "\n" + normalized_path = str(path).strip() + if ( + self._is_safe_relative_path(normalized_path) + and self._is_supported_generated_text_file(normalized_path) + and isinstance(file_content, str) + and file_content.strip() + ): + generated[normalized_path] = file_content.rstrip() + "\n" return generated - async def _generate_prompt_driven_files(self) -> tuple[dict[str, str], dict | None]: + async def _generate_prompt_driven_files(self) -> tuple[dict[str, str], dict | None, bool]: """Use the configured LLM to generate prompt-specific project files.""" fallback_files = self._fallback_generated_files() - system_prompt = ( - 'You generate small but concrete starter projects. ' - 'Return only JSON. Provide production-like but compact code that directly reflects the user request. ' - 'Include the files README.md, requirements.txt, main.py, and tests/test_app.py. ' - 'Use FastAPI for Python web requests unless the prompt clearly demands something else. ' - 'The test must verify a real behavior from main.py. ' - 'Do not wrap the JSON in markdown fences.' - ) - user_prompt = ( - f"Project name: {self.project_name}\n" - f"Description: {self.description}\n" - f"Original prompt: {self.prompt_text or self.description}\n" - f"Requested features: {json.dumps(self.features)}\n" - f"Preferred tech stack: {json.dumps(self.tech_stack)}\n" - f"Related issue: {json.dumps(self.related_issue) if self.related_issue else 'null'}\n\n" - "Return JSON shaped as {\"files\": [{\"path\": \"README.md\", \"content\": \"...\"}, ...]}." - ) + workspace_context = self._collect_workspace_context() + has_existing_files = bool(workspace_context.get('has_existing_files')) + if has_existing_files: + system_prompt = ( + 'You modify an existing software repository. ' + 'Return only JSON. Update the smallest necessary set of files to satisfy the new prompt. ' + 'Prefer editing existing files over inventing a new starter app. ' + 'Only return files that should be written. Omit unchanged files. ' + 'Use repository-relative paths and do not wrap the JSON in markdown fences.' + ) + user_prompt = ( + f"Project name: {self.project_name}\n" + f"Description: {self.description}\n" + f"Original prompt: {self.prompt_text or self.description}\n" + f"Requested features: {json.dumps(self.features)}\n" + f"Preferred tech stack: {json.dumps(self.tech_stack)}\n" + f"Related issue: {json.dumps(self.related_issue) if self.related_issue else 'null'}\n\n" + f"Current workspace snapshot:\n{json.dumps(workspace_context['files'], indent=2)}\n\n" + 'Return JSON shaped as {"files": [{"path": "relative/path.py", "content": "..."}, ...]}. ' + 'Each file path must be relative to the repository root.' + ) + else: + system_prompt = ( + 'You generate small but concrete starter projects. ' + 'Return only JSON. Provide production-like but compact code that directly reflects the user request. ' + 'Include the files README.md, requirements.txt, main.py, and tests/test_app.py. ' + 'Use FastAPI for Python web requests unless the prompt clearly demands something else. ' + 'The test must verify a real behavior from main.py. ' + 'Do not wrap the JSON in markdown fences.' + ) + user_prompt = ( + f"Project name: {self.project_name}\n" + f"Description: {self.description}\n" + f"Original prompt: {self.prompt_text or self.description}\n" + f"Requested features: {json.dumps(self.features)}\n" + f"Preferred tech stack: {json.dumps(self.tech_stack)}\n" + f"Related issue: {json.dumps(self.related_issue) if self.related_issue else 'null'}\n\n" + 'Return JSON shaped as {"files": [{"path": "README.md", "content": "..."}, ...]}. ' + 'At minimum include README.md, requirements.txt, main.py, and tests/test_app.py.' + ) content, trace = await LLMServiceClient().chat_with_trace( stage='generation_plan', system_prompt=system_prompt, @@ -293,12 +384,15 @@ class AgentOrchestrator: 'project_name': self.project_name, 'repository': self.ui_manager.ui_data.get('repository'), 'related_issue': self.related_issue, + 'workspace_files': workspace_context.get('files', []), }, expect_json=True, ) generated_files = self._parse_generated_files(content) + if has_existing_files: + return generated_files, trace, True merged_files = {**fallback_files, **generated_files} - return merged_files, trace + return merged_files, trace, False async def _sync_issue_context(self) -> None: """Sync repository issues and resolve a linked issue from the prompt when present.""" @@ -571,6 +665,8 @@ class AgentOrchestrator: target.parent.mkdir(parents=True, exist_ok=True) change_type = "UPDATE" if target.exists() else "CREATE" previous_content = target.read_text(encoding="utf-8") if target.exists() else "" + if previous_content == content: + return diff_text = self._build_diff_text(relative_path, previous_content, content) target.write_text(content, encoding="utf-8") self.changed_files.append(relative_path) @@ -679,9 +775,12 @@ class AgentOrchestrator: async def _generate_code(self) -> None: """Generate code using Ollama.""" - generated_files, trace = await self._generate_prompt_driven_files() + change_count_before = len(self.pending_code_changes) + generated_files, trace, editing_existing_workspace = await self._generate_prompt_driven_files() for relative_path, content in generated_files.items(): self._write_file(relative_path, content) + if editing_existing_workspace and len(self.pending_code_changes) == change_count_before: + raise RuntimeError('The LLM response did not produce any file changes for the existing project.') fallback_used = bool(trace and trace.get('fallback_used')) or trace is None if self.db_manager and self.history and self.prompt_audit and trace: self.db_manager.log_llm_trace(