From 1c4a570fac73fa68f2f4528e9858da060f20ee0d Mon Sep 17 00:00:00 2001
From: Alexander Thiess <thiess.alexander@googlemail.com>
Date: Tue, 16 Sep 2025 05:46:05 +0200
Subject: [PATCH] init

---
 .python-version                |   1 +
 README.md                      | 141 +++++++++++
 example.py                     |  21 ++
 main.py                        |   6 +
 prompts/system_message.md      |   3 +
 prompts/welcome.md             |   7 +
 pyproject.toml                 |  29 +++
 src/llmutils/__init__.py       |   6 +
 src/llmutils/prompt_manager.py | 411 +++++++++++++++++++++++++++++++++
 uv.lock                        |   8 +
 10 files changed, 633 insertions(+)
 create mode 100644 .python-version
 create mode 100644 example.py
 create mode 100644 main.py
 create mode 100644 prompts/system_message.md
 create mode 100644 prompts/welcome.md
 create mode 100644 pyproject.toml
 create mode 100644 src/llmutils/__init__.py
 create mode 100644 src/llmutils/prompt_manager.py
 create mode 100644 uv.lock

diff --git a/.python-version b/.python-version
new file mode 100644
index 0000000..24ee5b1
--- /dev/null
+++ b/.python-version
@@ -0,0 +1 @@
+3.13
diff --git a/README.md b/README.md
index b90e77c..f5d803e 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,143 @@
 # LLMUtils
 
+A Python utility library for managing LLM prompts with template variables and JSON schemas.
+
+## Installation
+
+```bash
+# Install from GitHub
+uv add git+https://git.project-insanity.de/gmarth/LLMUtils.git
+
+# Or with pip
+pip install git+https://git.project-insanity.de/gmarth/LLMUtils.git
+```
+
+## Features
+
+- **Smart Prompt Management**: Load and manage prompt templates with variable substitution
+- **On-demand Loading**: Prompts are loaded lazily at runtime for better performance
+- **Caching Support**: Optional caching to avoid repeated disk reads
+- **JSON Schema Support**: Associate structured output schemas with prompts
+- **Variable Validation**: Automatic validation of required template variables
+- **Flexible API**: Fill variables at retrieval or on-demand
+
+## Quick Start
+
+### Basic Usage
+
+```python
+from llmutils.prompt_manager import PromptManager
+
+# Get a prompt template
+result = PromptManager.get_prompt('greeting')
+print(result.variables)  # See required variables: {'name', 'age'}
+print(result.template)   # View the template: "Hello {{name}}, you are {{age}} years old"
+
+# Fill the template
+filled = result.fill(name='Alice', age=30)
+print(filled)  # "Hello Alice, you are 30 years old"
+```
+
+### Pre-filling Variables
+
+```python
+# Fill variables during retrieval
+result = PromptManager.get_prompt('greeting', name='Alice', age=30)
+print(result.prompt)  # Already filled: "Hello Alice, you are 30 years old"
+```
+
+### Validation
+
+```python
+result = PromptManager.get_prompt('greeting')
+
+# Check if variables are valid
+if not result.validate(name='Alice'):
+    missing = result.get_missing_variables(name='Alice')
+    print(f"Missing variables: {missing}")  # {'age'}
+
+# Fill with all required variables
+filled = result.fill(name='Alice', age=30)
+```
+
+### JSON Schema Support
+
+```python
+# Get prompt with associated schema
+result = PromptManager.get_prompt('task_prompt')
+
+if result.schema:
+    print("This prompt has a structured output schema")
+    print(result.schema)  # The JSON schema dictionary
+```
+
+## Configuration
+
+```python
+from pathlib import Path
+from llmutils.prompt_manager import PromptManager
+
+# Configure custom prompts directory (default: ./prompts)
+PromptManager.configure(path=Path('/custom/prompts/location'))
+
+# Disable caching for development
+PromptManager.configure(caching=False)
+
+# Clear cache to force reload
+PromptManager.reload_prompts()
+```
+
+## Prompt Files
+
+Place your prompt templates in the `prompts/` directory:
+
+- `prompts/greeting.md` - Markdown file with template
+- `prompts/greeting.json` - Optional JSON schema for structured output
+
+Example prompt template (`greeting.md`):
+```markdown
+Hello {{name}},
+
+You are {{age}} years old.
+```
+
+Example schema (`greeting.json`):
+```json
+{
+  "type": "object",
+  "properties": {
+    "response": {
+      "type": "string"
+    }
+  }
+}
+```
+
+## API Reference
+
+### PromptResult Class
+
+The `PromptResult` dataclass returned by `get_prompt()`:
+
+- `template: str` - The original template string
+- `name: str` - The prompt name
+- `variables: Set[str]` - Required template variables
+- `schema: Optional[Dict]` - Associated JSON schema
+- `prompt: str` - Property that returns filled prompt or template
+- `fill(**kwargs) -> str` - Fill template with variables
+- `validate(**kwargs) -> bool` - Check if all variables provided
+- `get_missing_variables(**kwargs) -> Set[str]` - Get missing variables
+
+### PromptManager Methods
+
+- `get_prompt(prompt_name, **kwargs) -> PromptResult` - Get a prompt template
+- `get_schema(prompt_name) -> Optional[Dict]` - Get just the schema
+- `has_schema(prompt_name) -> bool` - Check if prompt has schema
+- `list_prompts() -> Dict` - List all available prompts
+- `get_prompt_info(prompt_name) -> Dict` - Get detailed prompt information
+- `configure(path=None, caching=None)` - Configure settings
+- `reload_prompts()` - Clear the cache
+
+## License
+
+MIT
diff --git a/example.py b/example.py
new file mode 100644
index 0000000..8c34580
--- /dev/null
+++ b/example.py
@@ -0,0 +1,21 @@
+from pathlib import Path
+from src.llmutils import PromptManager
+from pprint import pprint
+
+
+def main():
+    print("Hello from llmutils!")
+    print("#"*5 + " Available Prompts " + "#"*5)
+    pprint(PromptManager().list_prompts())
+
+    system_prompt = PromptManager().get_prompt('system_message')
+    print("#"*5 + " System Message " + "#"*5)
+    print(system_prompt.prompt)
+
+    welcome_prompt = PromptManager().get_prompt('welcome', user='Alex', project_name='Something')
+    print("#"*5 + " Welcome Prompt " + "#"*5)
+    print(welcome_prompt.prompt)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..dbe0630
--- /dev/null
+++ b/main.py
@@ -0,0 +1,6 @@
+def main():
+    print("Hello from llmutils!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/prompts/system_message.md b/prompts/system_message.md
new file mode 100644
index 0000000..d9eb790
--- /dev/null
+++ b/prompts/system_message.md
@@ -0,0 +1,3 @@
+# System
+
+You are you and no one else.
\ No newline at end of file
diff --git a/prompts/welcome.md b/prompts/welcome.md
new file mode 100644
index 0000000..acc0f9b
--- /dev/null
+++ b/prompts/welcome.md
@@ -0,0 +1,7 @@
+# Welcome
+
+Hello {{user}},
+
+this is just a example and should show you everything you need to know to use it in your {{project_name}}.
+
+Have Fun!
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..02a2288
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,29 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "llmutils"
+version = "0.1.0"
+description = "Utilities for working with LLMs"
+readme = "README.md"
+requires-python = ">=3.13"
+dependencies = []
+authors = [
+    {name = "Alexander Thiess", email = "thiess.alexander@googlemail.com"}
+]
+license = {text = "MIT"}
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.13",
+]
+
+[project.urls]
+Homepage = "https://git.project-insanity.de/gmarth/LLMUtils"
+Repository = "https://git.project-insanity.de/gmarth/LLMUtils"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/llmutils"]
diff --git a/src/llmutils/__init__.py b/src/llmutils/__init__.py
new file mode 100644
index 0000000..c0ba3df
--- /dev/null
+++ b/src/llmutils/__init__.py
@@ -0,0 +1,6 @@
+"""LLMUtils - Utilities for working with LLMs"""
+
+from .prompt_manager import PromptManager
+
+__version__ = "0.1.0"
+__all__ = ["PromptManager"]
\ No newline at end of file
diff --git a/src/llmutils/prompt_manager.py b/src/llmutils/prompt_manager.py
new file mode 100644
index 0000000..d2fb410
--- /dev/null
+++ b/src/llmutils/prompt_manager.py
@@ -0,0 +1,411 @@
+import os
+import re
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, Any, Set, Optional
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class PromptResult:
+    """Smart result object that holds template and can fill variables on demand."""
+    template: str
+    name: str
+    variables: Set[str]
+    schema: Optional[Dict[str, Any]] = None
+    _filled_prompt: Optional[str] = None
+    _context: Optional[Dict[str, Any]] = None
+
+    def validate(self, **kwargs) -> bool:
+        """Validate that all required variables are provided.
+
+        Returns:
+            True if all required variables are present, False otherwise
+        """
+        provided_vars = set(kwargs.keys())
+        missing_vars = self.variables - provided_vars
+        return len(missing_vars) == 0
+
+    def get_missing_variables(self, **kwargs) -> Set[str]:
+        """Get the set of missing required variables.
+
+        Returns:
+            Set of variable names that are required but not provided
+        """
+        provided_vars = set(kwargs.keys())
+        return self.variables - provided_vars
+
+    def fill(self, **kwargs) -> str:
+        """Fill the template with provided variables.
+
+        Args:
+            **kwargs: Variables to fill in the template
+
+        Returns:
+            The filled prompt string
+
+        Raises:
+            ValueError: If required variables are missing
+        """
+        # If no variables required and none provided, return template as-is
+        if not self.variables and not kwargs:
+            self._filled_prompt = self.template
+            self._context = {}
+            return self.template
+
+        missing_vars = self.get_missing_variables(**kwargs)
+        if missing_vars:
+            raise ValueError(
+                f"Missing required variables for prompt '{self.name}': {missing_vars}. "
+                f"Required: {self.variables}, Provided: {set(kwargs.keys())}"
+            )
+
+        # Only process the template if there are actually variables to replace
+        if self.variables:
+            result = self.template
+            for key, value in kwargs.items():
+                if key in self.variables:  # Only replace known variables
+                    placeholder = f"{{{{{key}}}}}"  # {{key}}
+                    result = result.replace(placeholder, str(value))
+        else:
+            result = self.template
+
+        # Cache the filled result
+        self._filled_prompt = result
+        self._context = kwargs
+
+        return result
+
+    @property
+    def prompt(self) -> str:
+        """Get the filled prompt if available, otherwise return the template.
+
+        This property provides backward compatibility.
+        """
+        return self._filled_prompt if self._filled_prompt else self.template
+
+    def __str__(self) -> str:
+        """String representation returns the filled prompt or template."""
+        return self.prompt
+
+
+class PromptManager:
+    """Singleton class to manage prompt templates and JSON schemas"""
+
+    _instance: Optional['PromptManager'] = None
+    _initialized: bool = False
+    _prompt_path: Path
+    _caching: bool
+
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+
+    def __init__(self):
+        if not self._initialized:
+            self.prompts: Dict[str, str] = {}
+            self.schemas: Dict[str, Dict[str, Any]] = {}
+            self.prompt_variables: Dict[str, Set[str]] = {}
+            self._caching = True  # Enable caching by default
+            PromptManager._initialized = True
+
+    def _load_prompt(self, prompt_name: str) -> bool:
+        """Load a specific prompt and its schema on-demand.
+
+        Args:
+            prompt_name: Name of the prompt to load
+
+        Returns:
+            True if prompt was loaded successfully, False otherwise
+        """
+        # If caching is enabled and prompt already loaded, skip
+        if self._caching and prompt_name in self.prompts:
+            return True
+
+        prompts_dir = self._get_path()
+
+        if not prompts_dir.exists():
+            logger.warning(f"Prompts directory not found: {prompts_dir}")
+            return False
+
+        md_file = prompts_dir / f"{prompt_name}.md"
+
+        if not md_file.exists():
+            logger.debug(f"Prompt file not found: {md_file}")
+            return False
+
+        try:
+            # Load prompt template
+            with open(md_file, 'r', encoding='utf-8') as f:
+                content = f.read().strip()
+
+            # Extract variables from {{variable}} patterns
+            variables = self._extract_variables(content)
+
+            # Store in cache
+            self.prompts[prompt_name] = content
+            self.prompt_variables[prompt_name] = variables
+
+            # Look for corresponding JSON schema file
+            schema_file = md_file.with_suffix('.json')
+            if schema_file.exists():
+                try:
+                    with open(schema_file, 'r', encoding='utf-8') as f:
+                        schema = json.load(f)
+                    self.schemas[prompt_name] = schema
+                    logger.debug(f"Loaded prompt '{prompt_name}' with schema and variables: {variables}")
+                except json.JSONDecodeError as e:
+                    logger.error(f"Invalid JSON schema in {schema_file}: {e}")
+            else:
+                logger.debug(f"Loaded prompt '{prompt_name}' (no schema) with variables: {variables}")
+
+            return True
+
+        except Exception as e:
+            logger.error(f"Error loading prompt file {md_file}: {e}")
+            return False
+
+    def _get_path(self) -> Path:
+        """Get the prompts directory path.
+
+        Returns the configured path if set via configure(),
+        otherwise defaults to 'prompts' in the current working directory.
+        """
+        if hasattr(self, '_prompt_path') and self._prompt_path:
+            return self._prompt_path
+
+        # Default to 'prompts' directory in the current working directory
+        return Path.cwd() / 'prompts'
+
+
+    def _extract_variables(self, template: str) -> Set[str]:
+        """Extract all {{variable}} placeholders from template"""
+        pattern = r'\{\{(\w+)\}\}'
+        variables = set(re.findall(pattern, template))
+        return variables
+
+    def _validate_context(self, prompt_name: str, context: Dict[str, Any]) -> None:
+        """Validate that all required variables are provided"""
+        if prompt_name not in self.prompt_variables:
+            raise ValueError(f"Unknown prompt: '{prompt_name}'")
+
+        required_vars = self.prompt_variables[prompt_name]
+        provided_vars = set(context.keys())
+
+        missing_vars = required_vars - provided_vars
+        if missing_vars:
+            raise ValueError(
+                f"Missing required variables for prompt '{prompt_name}': {missing_vars}. "
+                f"Required: {required_vars}, Provided: {provided_vars}"
+            )
+
+        # Warn about extra variables (not an error, but might indicate mistakes)
+        extra_vars = provided_vars - required_vars
+        if extra_vars:
+            logger.warning(f"Extra variables provided for prompt '{prompt_name}': {extra_vars}")
+
+    def _fill_template(self, template: str, context: Dict[str, Any]) -> str:
+        """Fill template with context variables"""
+        result = template
+
+        for key, value in context.items():
+            placeholder = f"{{{{{key}}}}}"  # {{key}}
+            result = result.replace(placeholder, str(value))
+
+        return result
+
+    @classmethod
+    def configure(cls, path: Optional[Path] = None, caching: Optional[bool] = None):
+        """Configure the PromptManager settings.
+
+        Args:
+            path: Custom path to prompts directory
+            caching: Whether to cache loaded prompts (default: True)
+        """
+        instance = cls()
+        if path is not None:
+            instance._prompt_path = path
+            # Clear cache when path changes
+            instance.prompts.clear()
+            instance.schemas.clear()
+            instance.prompt_variables.clear()
+        if caching is not None:
+            instance._caching = caching
+            # If disabling cache, clear existing cached prompts
+            if not caching:
+                instance.prompts.clear()
+                instance.schemas.clear()
+                instance.prompt_variables.clear()
+
+    @classmethod
+    def get_prompt(cls, prompt_name: str, **kwargs) -> PromptResult:
+        """
+        Get a PromptResult that can be filled with variables.
+
+        Args:
+            prompt_name: Name of the prompt template (filename without .md)
+            **kwargs: Optional variables to pre-fill the template
+
+        Returns:
+            PromptResult object with smart fill/validate methods
+
+        Raises:
+            ValueError: If prompt doesn't exist
+
+        Examples:
+            # Get unfilled template
+            result = PromptManager.get_prompt('greeting')
+            print(result.variables)  # See required variables
+            filled = result.fill(name='Alice', age=30)
+
+            # Or pre-fill on retrieval
+            result = PromptManager.get_prompt('greeting', name='Alice', age=30)
+            print(result.prompt)  # Already filled
+        """
+        instance = cls()
+
+        # Try to load the prompt if not already loaded
+        if not instance._load_prompt(prompt_name):
+            raise ValueError(f"Prompt '{prompt_name}' not found")
+
+        # Get template, variables and schema
+        template = instance.prompts[prompt_name]
+        variables = instance.prompt_variables.get(prompt_name, set())
+        schema = instance.schemas.get(prompt_name)
+
+        # Create the result object
+        result = PromptResult(
+            template=template,
+            name=prompt_name,
+            variables=variables,
+            schema=schema
+        )
+
+        # If kwargs provided, pre-fill the template
+        if kwargs:
+            try:
+                result.fill(**kwargs)
+            except ValueError:
+                # If validation fails, return unfilled result and let user handle
+                pass
+
+        # If caching is disabled, clear the prompt after use
+        if not instance._caching:
+            del instance.prompts[prompt_name]
+            del instance.prompt_variables[prompt_name]
+            if prompt_name in instance.schemas:
+                del instance.schemas[prompt_name]
+
+        return result
+
+    @classmethod
+    def get_schema(cls, prompt_name: str) -> Optional[Dict[str, Any]]:
+        """
+        Get the JSON schema for a prompt if it exists
+
+        Args:
+            prompt_name: Name of the prompt template
+
+        Returns:
+            JSON schema dictionary or None if no schema exists
+        """
+        instance = cls()
+
+        # Try to load the prompt if not already loaded
+        if not instance._load_prompt(prompt_name):
+            raise ValueError(f"Prompt '{prompt_name}' not found")
+
+        return instance.schemas.get(prompt_name)
+
+    @classmethod
+    def has_schema(cls, prompt_name: str) -> bool:
+        """Check if a prompt has a JSON schema"""
+        instance = cls()
+        # Try to load the prompt if not already loaded
+        instance._load_prompt(prompt_name)
+        return prompt_name in instance.schemas
+
+    @classmethod
+    def get_prompt_with_schema(cls, prompt_name: str, **kwargs) -> PromptResult:
+        """
+        Get both the processed prompt and its schema (if available)
+
+        This is now just an alias for get_prompt() since it returns PromptResult.
+        Kept for backward compatibility.
+
+        Args:
+            prompt_name: Name of the prompt template
+            **kwargs: Variables to fill in the template
+
+        Returns:
+            PromptResult object containing prompt, schema, variables, and name
+        """
+        return cls.get_prompt(prompt_name, **kwargs)
+
+    @classmethod
+    def list_prompts(cls) -> Dict[str, Dict[str, Any]]:
+        """
+        List all available prompts with their info
+
+        Returns:
+            Dictionary mapping prompt names to their info (variables, has_schema)
+        """
+        instance = cls()
+        prompts_dir = instance._get_path()
+        result = {}
+
+        if not prompts_dir.exists():
+            return result
+
+        # Scan for all .md files in the prompts directory
+        for md_file in prompts_dir.glob("*.md"):
+            prompt_name = md_file.stem
+            # Load prompt to get its details
+            if instance._load_prompt(prompt_name):
+                result[prompt_name] = {
+                    'variables': instance.prompt_variables[prompt_name],
+                    'has_schema': prompt_name in instance.schemas,
+                    'variable_count': len(instance.prompt_variables[prompt_name])
+                }
+
+        return result
+
+    @classmethod
+    def reload_prompts(cls):
+        """Clear the cache to force reloading of prompts on next access"""
+        if cls._instance:
+            cls._instance.prompts.clear()
+            cls._instance.schemas.clear()
+            cls._instance.prompt_variables.clear()
+            logger.info("Prompt cache cleared")
+
+    @classmethod
+    def get_prompt_info(cls, prompt_name: str) -> Dict[str, Any]:
+        """
+        Get detailed information about a specific prompt
+
+        Returns:
+            Dictionary with prompt template, schema, and required variables
+        """
+        instance = cls()
+
+        # Try to load the prompt if not already loaded
+        if not instance._load_prompt(prompt_name):
+            raise ValueError(f"Prompt '{prompt_name}' not found")
+
+        info = {
+            'name': prompt_name,
+            'template': instance.prompts[prompt_name],
+            'variables': instance.prompt_variables[prompt_name],
+            'variable_count': len(instance.prompt_variables[prompt_name]),
+            'has_schema': prompt_name in instance.schemas
+        }
+
+        if prompt_name in instance.schemas:
+            info['schema'] = instance.schemas[prompt_name]
+
+        return info
diff --git a/uv.lock b/uv.lock
new file mode 100644
index 0000000..36475ce
--- /dev/null
+++ b/uv.lock
@@ -0,0 +1,8 @@
+version = 1
+revision = 3
+requires-python = ">=3.13"
+
+[[package]]
+name = "llmutils"
+version = "0.1.0"
+source = { editable = "." }