From 1944e2a9cfe3415291b46ba960626a59f2445d25 Mon Sep 17 00:00:00 2001
From: Simon Diesenreiter <simon@disi.dev>
Date: Sat, 11 Apr 2026 14:33:45 +0200
Subject: [PATCH] fix: more file generation improvements, refs NOISSUE

---
 ai_software_factory/agents/orchestrator.py | 147 +++++++++++++++++----
 1 file changed, 123 insertions(+), 24 deletions(-)

diff --git a/ai_software_factory/agents/orchestrator.py b/ai_software_factory/agents/orchestrator.py
index 0278950..7eca8a2 100644
--- a/ai_software_factory/agents/orchestrator.py
+++ b/ai_software_factory/agents/orchestrator.py
@@ -7,6 +7,7 @@ import json
 import py_compile
 import re
 import subprocess
+from pathlib import PurePosixPath
 from typing import Optional
 from datetime import datetime
 
@@ -31,6 +32,10 @@ class AgentOrchestrator:
 
     REMOTE_READY_REPOSITORY_MODES = {'project', 'onboarded'}
     REMOTE_READY_REPOSITORY_STATUSES = {'created', 'exists', 'ready', 'onboarded'}
+    GENERATED_TEXT_FILE_SUFFIXES = {'.py', '.md', '.txt', '.toml', '.yaml', '.yml', '.json', '.ini', '.cfg', '.sh', '.html', '.css', '.js', '.ts'}
+    GENERATED_TEXT_FILE_NAMES = {'README', 'README.md', '.gitignore', 'requirements.txt', 'pyproject.toml', 'Dockerfile', 'Containerfile', 'Makefile'}
+    MAX_WORKSPACE_CONTEXT_FILES = 20
+    MAX_WORKSPACE_CONTEXT_CHARS = 24000
 
     def __init__(
         self,
@@ -240,6 +245,59 @@ class AgentOrchestrator:
             fallback_used=False,
         )
 
+    def _is_safe_relative_path(self, path: str) -> bool:
+        """Return whether a generated file path is safe to write under the project root."""
+        normalized = str(PurePosixPath((path or '').strip()))
+        if not normalized or normalized in {'.', '..'}:
+            return False
+        if normalized.startswith('/') or normalized.startswith('../') or '/../' in normalized:
+            return False
+        if normalized.startswith('.git/'):
+            return False
+        return True
+
+    def _is_supported_generated_text_file(self, path: str) -> bool:
+        """Return whether the generated path is a supported text artifact."""
+        normalized = PurePosixPath(path)
+        if normalized.name in self.GENERATED_TEXT_FILE_NAMES:
+            return True
+        return normalized.suffix.lower() in self.GENERATED_TEXT_FILE_SUFFIXES
+
+    def _collect_workspace_context(self) -> dict:
+        """Collect a compact, text-only snapshot of the current project workspace."""
+        if not self.project_root.exists():
+            return {'has_existing_files': False, 'files': []}
+
+        files: list[dict] = []
+        total_chars = 0
+        for path in sorted(self.project_root.rglob('*')):
+            if not path.is_file():
+                continue
+            relative_path = path.relative_to(self.project_root).as_posix()
+            if relative_path == '.gitignore':
+                continue
+            if not self._is_safe_relative_path(relative_path) or not self._is_supported_generated_text_file(relative_path):
+                continue
+            try:
+                content = path.read_text(encoding='utf-8')
+            except (UnicodeDecodeError, OSError):
+                continue
+            remaining_chars = self.MAX_WORKSPACE_CONTEXT_CHARS - total_chars
+            if remaining_chars <= 0:
+                break
+            snippet = content[:remaining_chars]
+            files.append(
+                {
+                    'path': relative_path,
+                    'content': snippet,
+                    'truncated': len(snippet) < len(content),
+                }
+            )
+            total_chars += len(snippet)
+            if len(files) >= self.MAX_WORKSPACE_CONTEXT_FILES:
+                break
+        return {'has_existing_files': bool(files), 'files': files}
+
     def _parse_generated_files(self, content: str | None) -> dict[str, str]:
         """Parse an LLM file bundle response into relative-path/content pairs."""
         if not content:
@@ -248,7 +306,6 @@ class AgentOrchestrator:
             parsed = json.loads(content)
         except Exception:
             return {}
-        allowed_paths = set(self._fallback_generated_files().keys())
         generated: dict[str, str] = {}
         if isinstance(parsed, dict) and isinstance(parsed.get('files'), list):
             for item in parsed['files']:
@@ -256,34 +313,68 @@ class AgentOrchestrator:
                     continue
                 path = str(item.get('path') or '').strip()
                 file_content = item.get('content')
-                if path in allowed_paths and isinstance(file_content, str) and file_content.strip():
+                if (
+                    self._is_safe_relative_path(path)
+                    and self._is_supported_generated_text_file(path)
+                    and isinstance(file_content, str)
+                    and file_content.strip()
+                ):
                     generated[path] = file_content.rstrip() + "\n"
         elif isinstance(parsed, dict):
             for path, file_content in parsed.items():
-                if path in allowed_paths and isinstance(file_content, str) and file_content.strip():
-                    generated[str(path)] = file_content.rstrip() + "\n"
+                normalized_path = str(path).strip()
+                if (
+                    self._is_safe_relative_path(normalized_path)
+                    and self._is_supported_generated_text_file(normalized_path)
+                    and isinstance(file_content, str)
+                    and file_content.strip()
+                ):
+                    generated[normalized_path] = file_content.rstrip() + "\n"
         return generated
 
-    async def _generate_prompt_driven_files(self) -> tuple[dict[str, str], dict | None]:
+    async def _generate_prompt_driven_files(self) -> tuple[dict[str, str], dict | None, bool]:
         """Use the configured LLM to generate prompt-specific project files."""
         fallback_files = self._fallback_generated_files()
-        system_prompt = (
-            'You generate small but concrete starter projects. '
-            'Return only JSON. Provide production-like but compact code that directly reflects the user request. '
-            'Include the files README.md, requirements.txt, main.py, and tests/test_app.py. '
-            'Use FastAPI for Python web requests unless the prompt clearly demands something else. '
-            'The test must verify a real behavior from main.py. '
-            'Do not wrap the JSON in markdown fences.'
-        )
-        user_prompt = (
-            f"Project name: {self.project_name}\n"
-            f"Description: {self.description}\n"
-            f"Original prompt: {self.prompt_text or self.description}\n"
-            f"Requested features: {json.dumps(self.features)}\n"
-            f"Preferred tech stack: {json.dumps(self.tech_stack)}\n"
-            f"Related issue: {json.dumps(self.related_issue) if self.related_issue else 'null'}\n\n"
-            "Return JSON shaped as {\"files\": [{\"path\": \"README.md\", \"content\": \"...\"}, ...]}."
-        )
+        workspace_context = self._collect_workspace_context()
+        has_existing_files = bool(workspace_context.get('has_existing_files'))
+        if has_existing_files:
+            system_prompt = (
+                'You modify an existing software repository. '
+                'Return only JSON. Update the smallest necessary set of files to satisfy the new prompt. '
+                'Prefer editing existing files over inventing a new starter app. '
+                'Only return files that should be written. Omit unchanged files. '
+                'Use repository-relative paths and do not wrap the JSON in markdown fences.'
+            )
+            user_prompt = (
+                f"Project name: {self.project_name}\n"
+                f"Description: {self.description}\n"
+                f"Original prompt: {self.prompt_text or self.description}\n"
+                f"Requested features: {json.dumps(self.features)}\n"
+                f"Preferred tech stack: {json.dumps(self.tech_stack)}\n"
+                f"Related issue: {json.dumps(self.related_issue) if self.related_issue else 'null'}\n\n"
+                f"Current workspace snapshot:\n{json.dumps(workspace_context['files'], indent=2)}\n\n"
+                'Return JSON shaped as {"files": [{"path": "relative/path.py", "content": "..."}, ...]}. '
+                'Each file path must be relative to the repository root.'
+            )
+        else:
+            system_prompt = (
+                'You generate small but concrete starter projects. '
+                'Return only JSON. Provide production-like but compact code that directly reflects the user request. '
+                'Include the files README.md, requirements.txt, main.py, and tests/test_app.py. '
+                'Use FastAPI for Python web requests unless the prompt clearly demands something else. '
+                'The test must verify a real behavior from main.py. '
+                'Do not wrap the JSON in markdown fences.'
+            )
+            user_prompt = (
+                f"Project name: {self.project_name}\n"
+                f"Description: {self.description}\n"
+                f"Original prompt: {self.prompt_text or self.description}\n"
+                f"Requested features: {json.dumps(self.features)}\n"
+                f"Preferred tech stack: {json.dumps(self.tech_stack)}\n"
+                f"Related issue: {json.dumps(self.related_issue) if self.related_issue else 'null'}\n\n"
+                'Return JSON shaped as {"files": [{"path": "README.md", "content": "..."}, ...]}. '
+                'At minimum include README.md, requirements.txt, main.py, and tests/test_app.py.'
+            )
         content, trace = await LLMServiceClient().chat_with_trace(
             stage='generation_plan',
             system_prompt=system_prompt,
@@ -293,12 +384,15 @@ class AgentOrchestrator:
                 'project_name': self.project_name,
                 'repository': self.ui_manager.ui_data.get('repository'),
                 'related_issue': self.related_issue,
+                'workspace_files': workspace_context.get('files', []),
             },
             expect_json=True,
         )
         generated_files = self._parse_generated_files(content)
+        if has_existing_files:
+            return generated_files, trace, True
         merged_files = {**fallback_files, **generated_files}
-        return merged_files, trace
+        return merged_files, trace, False
 
     async def _sync_issue_context(self) -> None:
         """Sync repository issues and resolve a linked issue from the prompt when present."""
@@ -571,6 +665,8 @@ class AgentOrchestrator:
         target.parent.mkdir(parents=True, exist_ok=True)
         change_type = "UPDATE" if target.exists() else "CREATE"
         previous_content = target.read_text(encoding="utf-8") if target.exists() else ""
+        if previous_content == content:
+            return
         diff_text = self._build_diff_text(relative_path, previous_content, content)
         target.write_text(content, encoding="utf-8")
         self.changed_files.append(relative_path)
@@ -679,9 +775,12 @@ class AgentOrchestrator:
 
     async def _generate_code(self) -> None:
         """Generate code using Ollama."""
-        generated_files, trace = await self._generate_prompt_driven_files()
+        change_count_before = len(self.pending_code_changes)
+        generated_files, trace, editing_existing_workspace = await self._generate_prompt_driven_files()
         for relative_path, content in generated_files.items():
             self._write_file(relative_path, content)
+        if editing_existing_workspace and len(self.pending_code_changes) == change_count_before:
+            raise RuntimeError('The LLM response did not produce any file changes for the existing project.')
         fallback_used = bool(trace and trace.get('fallback_used')) or trace is None
         if self.db_manager and self.history and self.prompt_audit and trace:
             self.db_manager.log_llm_trace(