LivingAgents/llm_connector/llm.py

import json
from typing import Union, AsyncGenerator, List
import logging

import httpx

from .datatypes import LLMBackend, LLMMessage

logger = logging.getLogger(__name__)


async def get_response(backend: LLMBackend, messages: List[LLMMessage], stream: bool = False) -> Union[str, AsyncGenerator[str, None]]:

    try:
        # Prepare the request parameters
        request_params = {
            "model": backend["model"],
            "messages": messages,
            "stream": stream,
        }
        # Prepare headers
        headers = {
            "Content-Type": "application/json"
        }
        if len(backend["api_token"]):
            # Prepare headers
            headers['Authorization'] = f"Bearer {backend['api_token']}"

        print(request_params)
        print(headers)

        # Create httpx client
        async with httpx.AsyncClient(timeout=30.0) as client:
            url = f"{backend['base_url']}/chat/completions"

            if stream:
                # Stream the response
                async with client.stream(
                    "POST",
                    url,
                    headers=headers,
                    json=request_params,
                ) as response:
                    response.raise_for_status()

                    async for line in response.aiter_lines():
                        line = line.strip()

                        # Skip empty lines and non-data lines
                        if not line or not line.startswith("data: "):
                            continue

                        # Remove "data: " prefix
                        data = line[6:]

                        # Check for stream end
                        if data == "[DONE]":
                            break

                        try:
                            # Parse JSON chunk
                            chunk_data = json.loads(data)

                            if "choices" in chunk_data and chunk_data["choices"]:
                                choice = chunk_data["choices"][0]
                                delta = choice.get("delta", {})

                                # Handle reasoning content (for models that support it)
                                if "reasoning_content" in delta and delta["reasoning_content"]:
                                    yield {'reasoning': delta["reasoning_content"]}  # type: ignore

                                # Handle regular content
                                if "content" in delta and delta["content"]:
                                    yield {'content': delta["content"]}  # type: ignore

                        except json.JSONDecodeError:
                            # Skip malformed JSON chunks
                            continue
            else:
                # Non-streaming response
                response = await client.post(
                    url,
                    headers=headers,
                    json=request_params,
                )
                response.raise_for_status()

                response_data = response.json()
                content = ""

                if "choices" in response_data and response_data["choices"]:
                    message = response_data["choices"][0].get("message", {})
                    content = message.get("content", "")

                # FIX: Yield as dictionary to match streaming format
                if content:
                    yield {'content': content}  # type: ignore

    except httpx.HTTPStatusError as e:
        error_msg = f"HTTP error getting LLM response: {e.response.status_code} - {e.response.text}"
        logger.error(error_msg)
        yield ""

    except httpx.RequestError as e:
        error_msg = f"Request error getting LLM response: {str(e)}"
        logger.error(error_msg)
        yield ""

    except Exception as e:
        error_msg = f"Error getting LLM response: {str(e)}"
        logger.error(error_msg)
        yield ""


async def _empty_async_generator() -> AsyncGenerator[str, None]:
    """Helper function for empty async generator"""
    if False:
        yield ""