too much

2025-09-01 06:43:11 +02:00
parent bde3fc0df9
commit 45eb2b8bc5
38 changed files with 3424 additions and 915 deletions
--- a/llm_connector/llm.py
+++ b/llm_connector/llm.py
@@ -1,5 +1,5 @@
 import json
-from typing import Union, AsyncGenerator, List
+from typing import Union, AsyncGenerator, List, Optional, Dict, Any
 import logging

 import httpx
@@ -9,75 +9,49 @@ from .datatypes import LLMBackend, LLMMessage
 logger = logging.getLogger(__name__)


-async def get_response(backend: LLMBackend, messages: List[LLMMessage], stream: bool = False) -> Union[str, AsyncGenerator[str, None]]:
+class LLMClient:
+    """Client for interacting with LLM APIs"""
+    backend: LLMBackend
+    embedding_backend: LLMBackend
+    timeout: float

-    try:
-        # Prepare the request parameters
-        request_params = {
-            "model": backend["model"],
-            "messages": messages,
-            "stream": stream,
-        }
-        # Prepare headers
-        headers = {
-            "Content-Type": "application/json"
-        }
-        if len(backend["api_token"]):
-            # Prepare headers
-            headers['Authorization'] = f"Bearer {backend['api_token']}"
+    def __init__(self, backend: LLMBackend, embedding_backend: Optional[LLMBackend], timeout: float = 30.0):
+        """Initialize the LLM client

-        print(request_params)
-        print(headers)
+        Args:
+            backend: LLM backend configuration containing base_url, api_token, and model
+        """
+        self.backend = backend
+        self.embedding_backend = embedding_backend if embedding_backend else backend
+        self.timeout = timeout

-        # Create httpx client
-        async with httpx.AsyncClient(timeout=30.0) as client:
-            url = f"{backend['base_url']}/chat/completions"
+    async def get_embedding(self, text: str) -> List[float]:
+        """Get embedding for text

-            if stream:
-                # Stream the response
-                async with client.stream(
-                    "POST",
-                    url,
-                    headers=headers,
-                    json=request_params,
-                ) as response:
-                    response.raise_for_status()
+        Args:
+            text: Text to get embedding for
+            model: Optional embedding model to use (overrides backend model)

-                    async for line in response.aiter_lines():
-                        line = line.strip()
+        Returns:
+            List of float values representing the embedding vector
+        """
+        try:
+            # Use provided model or fall back to backend model

-                        # Skip empty lines and non-data lines
-                        if not line or not line.startswith("data: "):
-                            continue
+            request_params = {
+                "model": self.embedding_backend["model"],
+                "prompt": text
+            }

-                        # Remove "data: " prefix
-                        data = line[6:]
+            headers = {
+                "Content-Type": "application/json"
+            }
+            if len(self.embedding_backend["api_token"]):
+                headers['Authorization'] = f"Bearer {self.embedding_backend['api_token']}"

-                        # Check for stream end
-                        if data == "[DONE]":
-                            break
+            async with httpx.AsyncClient(timeout=self.timeout) as client:
+                url = f"{self.embedding_backend['base_url']}/embeddings"

-                        try:
-                            # Parse JSON chunk
-                            chunk_data = json.loads(data)
-
-                            if "choices" in chunk_data and chunk_data["choices"]:
-                                choice = chunk_data["choices"][0]
-                                delta = choice.get("delta", {})
-
-                                # Handle reasoning content (for models that support it)
-                                if "reasoning_content" in delta and delta["reasoning_content"]:
-                                    yield {'reasoning': delta["reasoning_content"]}  # type: ignore
-
-                                # Handle regular content
-                                if "content" in delta and delta["content"]:
-                                    yield {'content': delta["content"]}  # type: ignore
-
-                        except json.JSONDecodeError:
-                            # Skip malformed JSON chunks
-                            continue
-            else:
-                # Non-streaming response
                response = await client.post(
                    url,
                    headers=headers,
@@ -86,30 +60,220 @@ async def get_response(backend: LLMBackend, messages: List[LLMMessage], stream:
                response.raise_for_status()

                response_data = response.json()
-                content = ""

-                if "choices" in response_data and response_data["choices"]:
-                    message = response_data["choices"][0].get("message", {})
-                    content = message.get("content", "")
+                # Extract embedding from response
+                if "embedding" in response_data and response_data["embedding"]:
+                    return response_data["embedding"]
+                else:
+                    logger.error("No embedding data in response")
+                    return []

-                # FIX: Yield as dictionary to match streaming format
-                if content:
-                    yield {'content': content}  # type: ignore
+        except httpx.HTTPStatusError as e:
+            logger.error(f"HTTP error getting embedding: {e.response.status_code} - {e.response.text}")
+            return []

-    except httpx.HTTPStatusError as e:
-        error_msg = f"HTTP error getting LLM response: {e.response.status_code} - {e.response.text}"
-        logger.error(error_msg)
-        yield ""
+        except httpx.RequestError as e:
+            logger.error(f"Request error getting embedding: {str(e)}")
+            return []

-    except httpx.RequestError as e:
-        error_msg = f"Request error getting LLM response: {str(e)}"
-        logger.error(error_msg)
-        yield ""
+        except Exception as e:
+            logger.error(f"Error getting embedding: {str(e)}")
+            return []

-    except Exception as e:
-        error_msg = f"Error getting LLM response: {str(e)}"
-        logger.error(error_msg)
-        yield ""
+    async def get_response(self, messages: List[LLMMessage], stream: Optional[bool]) -> AsyncGenerator[dict[str, Any] | str, Any]:
+        """Get response from the LLM
+
+        Args:
+            messages: List of messages to send to the LLM
+            stream: Whether to stream responses by default
+
+        Returns:
+            Either a string response or an async generator for streaming
+        """
+        try:
+
+            stream = stream if stream else False
+
+            # Prepare the request parameters
+            request_params = {
+                "model": self.backend["model"],
+                "messages": messages,
+                "stream": stream,
+            }
+            # Prepare headers
+            headers = {
+                "Content-Type": "application/json"
+            }
+            if len(self.backend["api_token"]):
+                headers['Authorization'] = f"Bearer {self.backend['api_token']}"
+
+            logger.info(headers)
+            logger.info(request_params)
+
+            # Create httpx client
+            async with httpx.AsyncClient(timeout=self.timeout) as client:
+                url = f"{self.backend['base_url']}/chat"
+
+                if stream:
+                    # Stream the response
+                    async with client.stream(
+                            "POST",
+                            url,
+                            headers=headers,
+                            json=request_params,
+                    ) as response:
+                        response.raise_for_status()
+
+                        async for line in response.aiter_lines():
+                            line = line.strip()
+
+                            # Skip empty lines and non-data lines
+                            if not line or not line.startswith("data: "):
+                                continue
+
+                            # Remove "data: " prefix
+                            data = line[6:]
+
+                            # Check for stream end
+                            if data == "[DONE]":
+                                break
+
+                            try:
+                                # Parse JSON chunk
+                                chunk_data = json.loads(data)
+
+                                if "choices" in chunk_data and chunk_data["choices"]:
+                                    choice = chunk_data["choices"][0]
+                                    delta = choice.get("delta", {})
+
+                                    # Handle reasoning content (for models that support it)
+                                    if "reasoning_content" in delta and delta["reasoning_content"]:
+                                        yield {'reasoning': delta["reasoning_content"]}  # type: ignore
+
+                                    # Handle regular content
+                                    if "content" in delta and delta["content"]:
+                                        yield {'content': delta["content"]}  # type: ignore
+
+                            except json.JSONDecodeError:
+                                # Skip malformed JSON chunks
+                                continue
+                else:
+                    # Non-streaming response
+                    response = await client.post(
+                        url,
+                        headers=headers,
+                        json=request_params,
+                    )
+                    response.raise_for_status()
+
+                    response_data = response.json()
+                    content = ""
+
+                    # if "message" in response_data and response_data["message"]:
+                    #    content = response_data["message"][0]['content']
+                    content = response_data["message"]['content']
+                    logger.info(response_data)
+                    # FIX: Yield as dictionary to match streaming format
+                    if content:
+                        logger.info(content)
+                        yield {'content': content}  # type: ignore
+
+        except httpx.HTTPStatusError as e:
+            error_msg = f"HTTP error getting LLM response: {e.response.status_code} - {e.response.text}"
+            logger.error(error_msg)
+            yield ""
+
+        except httpx.RequestError as e:
+            error_msg = f"Request error getting LLM response: {str(e)}"
+            logger.error(error_msg)
+            yield ""
+
+        except Exception as e:
+            error_msg = f"Error getting LLM response: {str(e)}"
+            logger.error(error_msg)
+            yield ""
+
+    async def get_structured_response(self, messages: List[LLMMessage], json_format: Dict[str, Any]) -> Dict[str, Any]:
+        """Get structured JSON response from the LLM using a JSON schema
+
+        Args:
+            messages: List of messages to send to the LLM
+            json_format: JSON schema for structured output
+
+        Returns:
+            Parsed JSON response as dictionary
+
+        Raises:
+            ValueError: If the response is not valid JSON
+            HTTPError: If the API request fails
+        """
+        try:
+            # Prepare the request parameters with format
+            request_params = {
+                "model": self.backend["model"],
+                "messages": messages,
+                "format": json_format,  # Ollama's structured output parameter
+                "stream": False,
+            }
+
+            # Prepare headers
+            headers = {
+                "Content-Type": "application/json"
+            }
+            if len(self.backend["api_token"]):
+                headers['Authorization'] = f"Bearer {self.backend['api_token']}"
+
+            logger.info("Structured request headers: %s", headers)
+            logger.info("Structured request params: %s", request_params)
+
+            # Create httpx client
+            async with httpx.AsyncClient(timeout=self.timeout) as client:
+                url = f"{self.backend['base_url']}/chat"
+
+                # Non-streaming response only
+                response = await client.post(
+                    url,
+                    headers=headers,
+                    json=request_params,
+                )
+                response.raise_for_status()
+
+                response_data = response.json()
+                logger.info("Structured response data: %s", response_data)
+
+                # Extract content from response
+                if "message" not in response_data or not response_data["message"]:
+                    raise ValueError("No message in response")
+
+                content = response_data["message"].get('content', '')
+
+                if not content:
+                    raise ValueError("Empty content in structured response")
+
+                # Parse JSON content
+                try:
+                    structured_data = json.loads(content)
+                    logger.info("Parsed structured data: %s", structured_data)
+                    return structured_data
+
+                except json.JSONDecodeError as e:
+                    logger.error("Failed to parse structured response as JSON: %s", content)
+                    raise ValueError(f"Response is not valid JSON: {e}")
+
+        except httpx.HTTPStatusError as e:
+            error_msg = f"HTTP error getting structured LLM response: {e.response.status_code} - {e.response.text}"
+            logger.error(error_msg)
+            raise
+
+        except httpx.RequestError as e:
+            error_msg = f"Request error getting structured LLM response: {str(e)}"
+            logger.error(error_msg)
+            raise
+
+        except Exception as e:
+            error_msg = f"Error getting structured LLM response: {str(e)}"
+            logger.error(error_msg)
+            raise


 async def _empty_async_generator() -> AsyncGenerator[str, None]: