import json from typing import Union, AsyncGenerator, List import logging import httpx from .datatypes import LLMBackend, LLMMessage logger = logging.getLogger(__name__) async def get_response(backend: LLMBackend, messages: List[LLMMessage], stream: bool = False) -> Union[str, AsyncGenerator[str, None]]: try: # Prepare the request parameters request_params = { "model": backend["model"], "messages": messages, "stream": stream, } # Prepare headers headers = { "Content-Type": "application/json" } if len(backend["api_token"]): # Prepare headers headers['Authorization'] = f"Bearer {backend['api_token']}" print(request_params) print(headers) # Create httpx client async with httpx.AsyncClient(timeout=30.0) as client: url = f"{backend['base_url']}/chat/completions" if stream: # Stream the response async with client.stream( "POST", url, headers=headers, json=request_params, ) as response: response.raise_for_status() async for line in response.aiter_lines(): line = line.strip() # Skip empty lines and non-data lines if not line or not line.startswith("data: "): continue # Remove "data: " prefix data = line[6:] # Check for stream end if data == "[DONE]": break try: # Parse JSON chunk chunk_data = json.loads(data) if "choices" in chunk_data and chunk_data["choices"]: choice = chunk_data["choices"][0] delta = choice.get("delta", {}) # Handle reasoning content (for models that support it) if "reasoning_content" in delta and delta["reasoning_content"]: yield {'reasoning': delta["reasoning_content"]} # type: ignore # Handle regular content if "content" in delta and delta["content"]: yield {'content': delta["content"]} # type: ignore except json.JSONDecodeError: # Skip malformed JSON chunks continue else: # Non-streaming response response = await client.post( url, headers=headers, json=request_params, ) response.raise_for_status() response_data = response.json() content = "" if "choices" in response_data and response_data["choices"]: message = response_data["choices"][0].get("message", {}) content = message.get("content", "") # FIX: Yield as dictionary to match streaming format if content: yield {'content': content} # type: ignore except httpx.HTTPStatusError as e: error_msg = f"HTTP error getting LLM response: {e.response.status_code} - {e.response.text}" logger.error(error_msg) yield "" except httpx.RequestError as e: error_msg = f"Request error getting LLM response: {str(e)}" logger.error(error_msg) yield "" except Exception as e: error_msg = f"Error getting LLM response: {str(e)}" logger.error(error_msg) yield "" async def _empty_async_generator() -> AsyncGenerator[str, None]: """Helper function for empty async generator""" if False: yield ""