import json from typing import Union, AsyncGenerator, List, Optional, Dict, Any import logging import httpx from .datatypes import LLMBackend, LLMMessage logger = logging.getLogger(__name__) class LLMClient: """Client for interacting with LLM APIs""" backend: LLMBackend embedding_backend: LLMBackend timeout: float def __init__(self, backend: LLMBackend, embedding_backend: Optional[LLMBackend], timeout: float = 30.0): """Initialize the LLM client Args: backend: LLM backend configuration containing base_url, api_token, and model """ self.backend = backend self.embedding_backend = embedding_backend if embedding_backend else backend self.timeout = timeout async def get_embedding(self, text: str) -> List[float]: """Get embedding for text Args: text: Text to get embedding for Returns: List of float values representing the embedding vector """ try: # Use provided model or fall back to backend model request_params = { "model": self.embedding_backend["model"], "prompt": text } headers = { "Content-Type": "application/json" } if len(self.embedding_backend["api_token"]): headers['Authorization'] = f"Bearer {self.embedding_backend['api_token']}" async with httpx.AsyncClient(timeout=self.timeout) as client: url = f"{self.embedding_backend['base_url']}/embeddings" response = await client.post( url, headers=headers, json=request_params, ) response.raise_for_status() response_data = response.json() # Extract embedding from response if "embedding" in response_data and response_data["embedding"]: return response_data["embedding"] else: logger.error("No embedding data in response") return [] except httpx.HTTPStatusError as e: logger.error(f"HTTP error getting embedding: {e.response.status_code} - {e.response.text}") return [] except httpx.RequestError as e: logger.error(f"Request error getting embedding: {str(e)}") return [] except Exception as e: logger.error(f"Error getting embedding: {str(e)}") return [] async def get_response(self, messages: List[LLMMessage], stream: Optional[bool]) -> AsyncGenerator[dict[str, Any] | str, Any]: """Get response from the LLM Args: messages: List of messages to send to the LLM stream: Whether to stream responses by default Returns: Either a string response or an async generator for streaming """ try: stream = stream if stream else False # Prepare the request parameters request_params = { "model": self.backend["model"], "messages": messages, "stream": stream, } # Prepare headers headers = { "Content-Type": "application/json" } if len(self.backend["api_token"]): headers['Authorization'] = f"Bearer {self.backend['api_token']}" logger.info(headers) logger.info(request_params) # Create httpx client async with httpx.AsyncClient(timeout=self.timeout) as client: url = f"{self.backend['base_url']}/chat" if stream: # Stream the response async with client.stream( "POST", url, headers=headers, json=request_params, ) as response: response.raise_for_status() async for line in response.aiter_lines(): line = line.strip() # Skip empty lines and non-data lines if not line or not line.startswith("data: "): continue # Remove "data: " prefix data = line[6:] # Check for stream end if data == "[DONE]": break try: # Parse JSON chunk chunk_data = json.loads(data) if "choices" in chunk_data and chunk_data["choices"]: choice = chunk_data["choices"][0] delta = choice.get("delta", {}) # Handle reasoning content (for models that support it) if "reasoning_content" in delta and delta["reasoning_content"]: yield {'reasoning': delta["reasoning_content"]} # type: ignore # Handle regular content if "content" in delta and delta["content"]: yield {'content': delta["content"]} # type: ignore except json.JSONDecodeError: # Skip malformed JSON chunks continue else: # Non-streaming response response = await client.post( url, headers=headers, json=request_params, ) response.raise_for_status() response_data = response.json() content = "" # if "message" in response_data and response_data["message"]: # content = response_data["message"][0]['content'] content = response_data["message"]['content'] logger.info(response_data) # FIX: Yield as dictionary to match streaming format if content: logger.info(content) yield {'content': content} # type: ignore except httpx.HTTPStatusError as e: error_msg = f"HTTP error getting LLM response: {e.response.status_code} - {e.response.text}" logger.error(error_msg) yield "" except httpx.RequestError as e: error_msg = f"Request error getting LLM response: {str(e)}" logger.error(error_msg) yield "" except Exception as e: error_msg = f"Error getting LLM response: {str(e)}" logger.error(error_msg) yield "" async def get_structured_response(self, messages: List[LLMMessage], json_format: Dict[str, Any]) -> Dict[str, Any]: """Get structured JSON response from the LLM using a JSON schema Args: messages: List of messages to send to the LLM json_format: JSON schema for structured output Returns: Parsed JSON response as dictionary Raises: ValueError: If the response is not valid JSON HTTPError: If the API request fails """ try: # Prepare the request parameters with format request_params = { "model": self.backend["model"], "messages": messages, "format": json_format, # Ollama's structured output parameter "stream": False, } # Prepare headers headers = { "Content-Type": "application/json" } if len(self.backend["api_token"]): headers['Authorization'] = f"Bearer {self.backend['api_token']}" logger.info("Structured request headers: %s", headers) logger.info("Structured request params: %s", request_params) # Create httpx client async with httpx.AsyncClient(timeout=self.timeout) as client: url = f"{self.backend['base_url']}/chat" # Non-streaming response only response = await client.post( url, headers=headers, json=request_params, ) response.raise_for_status() response_data = response.json() logger.info("Structured response data: %s", response_data) # Extract content from response if "message" not in response_data or not response_data["message"]: raise ValueError("No message in response") content = response_data["message"].get('content', '') if not content: raise ValueError("Empty content in structured response") # Parse JSON content try: structured_data = json.loads(content) logger.info("Parsed structured data: %s", structured_data) return structured_data except json.JSONDecodeError as e: logger.error("Failed to parse structured response as JSON: %s", content) raise ValueError(f"Response is not valid JSON: {e}") except httpx.HTTPStatusError as e: error_msg = f"HTTP error getting structured LLM response: {e.response.status_code} - {e.response.text}" logger.error(error_msg) raise except httpx.RequestError as e: error_msg = f"Request error getting structured LLM response: {str(e)}" logger.error(error_msg) raise except Exception as e: error_msg = f"Error getting structured LLM response: {str(e)}" logger.error(error_msg) raise async def _empty_async_generator() -> AsyncGenerator[str, None]: """Helper function for empty async generator""" if False: yield ""