LivingAgents/llm_connector/llm.py

import json
from typing import Union, AsyncGenerator, List, Optional, Dict, Any
import logging

import httpx

from .datatypes import LLMBackend, LLMMessage

logger = logging.getLogger(__name__)


class LLMClient:
    """Client for interacting with LLM APIs"""
    backend: LLMBackend
    embedding_backend: LLMBackend
    timeout: float

    def __init__(self, backend: LLMBackend, embedding_backend: Optional[LLMBackend], timeout: float = 30.0):
        """Initialize the LLM client

        Args:
            backend: LLM backend configuration containing base_url, api_token, and model
        """
        self.backend = backend
        self.embedding_backend = embedding_backend if embedding_backend else backend
        self.timeout = timeout

    async def get_embedding(self, text: str) -> List[float]:
        """Get embedding for text

        Args:
            text: Text to get embedding for

        Returns:
            List of float values representing the embedding vector
        """
        try:
            # Use provided model or fall back to backend model

            request_params = {
                "model": self.embedding_backend["model"],
                "prompt": text
            }

            headers = {
                "Content-Type": "application/json"
            }
            if len(self.embedding_backend["api_token"]):
                headers['Authorization'] = f"Bearer {self.embedding_backend['api_token']}"

            async with httpx.AsyncClient(timeout=self.timeout) as client:
                url = f"{self.embedding_backend['base_url']}/embeddings"

                response = await client.post(
                    url,
                    headers=headers,
                    json=request_params,
                )
                response.raise_for_status()

                response_data = response.json()

                # Extract embedding from response
                if "embedding" in response_data and response_data["embedding"]:
                    return response_data["embedding"]
                else:
                    logger.error("No embedding data in response")
                    return []

        except httpx.HTTPStatusError as e:
            logger.error(f"HTTP error getting embedding: {e.response.status_code} - {e.response.text}")
            return []

        except httpx.RequestError as e:
            logger.error(f"Request error getting embedding: {str(e)}")
            return []

        except Exception as e:
            logger.error(f"Error getting embedding: {str(e)}")
            return []

    async def get_response(self, messages: List[LLMMessage], stream: Optional[bool]) -> AsyncGenerator[dict[str, Any] | str, Any]:
        """Get response from the LLM

        Args:
            messages: List of messages to send to the LLM
            stream: Whether to stream responses by default

        Returns:
            Either a string response or an async generator for streaming
        """
        try:

            stream = stream if stream else False

            # Prepare the request parameters
            request_params = {
                "model": self.backend["model"],
                "messages": messages,
                "stream": stream,
            }
            # Prepare headers
            headers = {
                "Content-Type": "application/json"
            }
            if len(self.backend["api_token"]):
                headers['Authorization'] = f"Bearer {self.backend['api_token']}"

            logger.info(headers)
            logger.info(request_params)

            # Create httpx client
            async with httpx.AsyncClient(timeout=self.timeout) as client:
                url = f"{self.backend['base_url']}/chat"

                if stream:
                    # Stream the response
                    async with client.stream(
                            "POST",
                            url,
                            headers=headers,
                            json=request_params,
                    ) as response:
                        response.raise_for_status()

                        async for line in response.aiter_lines():
                            line = line.strip()

                            # Skip empty lines and non-data lines
                            if not line or not line.startswith("data: "):
                                continue

                            # Remove "data: " prefix
                            data = line[6:]

                            # Check for stream end
                            if data == "[DONE]":
                                break

                            try:
                                # Parse JSON chunk
                                chunk_data = json.loads(data)

                                if "choices" in chunk_data and chunk_data["choices"]:
                                    choice = chunk_data["choices"][0]
                                    delta = choice.get("delta", {})

                                    # Handle reasoning content (for models that support it)
                                    if "reasoning_content" in delta and delta["reasoning_content"]:
                                        yield {'reasoning': delta["reasoning_content"]}  # type: ignore

                                    # Handle regular content
                                    if "content" in delta and delta["content"]:
                                        yield {'content': delta["content"]}  # type: ignore

                            except json.JSONDecodeError:
                                # Skip malformed JSON chunks
                                continue
                else:
                    # Non-streaming response
                    response = await client.post(
                        url,
                        headers=headers,
                        json=request_params,
                    )
                    response.raise_for_status()

                    response_data = response.json()
                    content = ""

                    # if "message" in response_data and response_data["message"]:
                    #    content = response_data["message"][0]['content']
                    content = response_data["message"]['content']
                    logger.info(response_data)
                    # FIX: Yield as dictionary to match streaming format
                    if content:
                        logger.info(content)
                        yield {'content': content}  # type: ignore

        except httpx.HTTPStatusError as e:
            error_msg = f"HTTP error getting LLM response: {e.response.status_code} - {e.response.text}"
            logger.error(error_msg)
            yield ""

        except httpx.RequestError as e:
            error_msg = f"Request error getting LLM response: {str(e)}"
            logger.error(error_msg)
            yield ""

        except Exception as e:
            error_msg = f"Error getting LLM response: {str(e)}"
            logger.error(error_msg)
            yield ""

    async def get_structured_response(self, messages: List[LLMMessage], json_format: Dict[str, Any]) -> Dict[str, Any]:
        """Get structured JSON response from the LLM using a JSON schema

        Args:
            messages: List of messages to send to the LLM
            json_format: JSON schema for structured output

        Returns:
            Parsed JSON response as dictionary

        Raises:
            ValueError: If the response is not valid JSON
            HTTPError: If the API request fails
        """
        try:
            # Prepare the request parameters with format
            request_params = {
                "model": self.backend["model"],
                "messages": messages,
                "format": json_format,  # Ollama's structured output parameter
                "stream": False,
            }

            # Prepare headers
            headers = {
                "Content-Type": "application/json"
            }
            if len(self.backend["api_token"]):
                headers['Authorization'] = f"Bearer {self.backend['api_token']}"

            logger.info("Structured request headers: %s", headers)
            logger.info("Structured request params: %s", request_params)

            # Create httpx client
            async with httpx.AsyncClient(timeout=self.timeout) as client:
                url = f"{self.backend['base_url']}/chat"

                # Non-streaming response only
                response = await client.post(
                    url,
                    headers=headers,
                    json=request_params,
                )
                response.raise_for_status()

                response_data = response.json()
                logger.info("Structured response data: %s", response_data)

                # Extract content from response
                if "message" not in response_data or not response_data["message"]:
                    raise ValueError("No message in response")

                content = response_data["message"].get('content', '')

                if not content:
                    raise ValueError("Empty content in structured response")

                # Parse JSON content
                try:
                    structured_data = json.loads(content)
                    logger.info("Parsed structured data: %s", structured_data)
                    return structured_data

                except json.JSONDecodeError as e:
                    logger.error("Failed to parse structured response as JSON: %s", content)
                    raise ValueError(f"Response is not valid JSON: {e}")

        except httpx.HTTPStatusError as e:
            error_msg = f"HTTP error getting structured LLM response: {e.response.status_code} - {e.response.text}"
            logger.error(error_msg)
            raise

        except httpx.RequestError as e:
            error_msg = f"Request error getting structured LLM response: {str(e)}"
            logger.error(error_msg)
            raise

        except Exception as e:
            error_msg = f"Error getting structured LLM response: {str(e)}"
            logger.error(error_msg)
            raise


async def _empty_async_generator() -> AsyncGenerator[str, None]:
    """Helper function for empty async generator"""
    if False:
        yield ""