diff --git a/src/components/model_info.py b/src/components/model_info.py index 1f1c815..163419e 100644 --- a/src/components/model_info.py +++ b/src/components/model_info.py @@ -55,18 +55,18 @@ class ModelInfoComponent(AsyncCard): with ui.tab_panel(basic_tab).classes('p-4'): with ui.scroll_area().classes('w-full').style('height: 400px; max-height: 60vh'): with ui.column().classes('gap-4'): - if 'license' in model_info: - with ui.row().classes('items-start gap-4'): - ui.label('License:').classes('text-sm font-bold text-white min-w-24') - ui.label(model_info['license']).classes('text-sm text-grey-5 flex-1') + if 'system' in model_info: + ui.label('System Prompt:').classes('text-sm font-bold text-white mb-2') + ui.html(f'
{model_info["system"]}
').classes('w-full') if 'template' in model_info: ui.label('Template:').classes('text-sm font-bold text-white mb-2') ui.html(f'
{model_info["template"]}
').classes('w-full') - if 'system' in model_info: - ui.label('System Prompt:').classes('text-sm font-bold text-white mb-2') - ui.html(f'
{model_info["system"]}
').classes('w-full') + if 'license' in model_info: + with ui.row().classes('items-start gap-4'): + ui.label('License:').classes('text-sm font-bold text-white min-w-24') + ui.label(model_info['license']).classes('text-sm text-grey-5 flex-1') # Parameters Tab if has_params and params_tab: diff --git a/src/components/ollama_model_creation.py b/src/components/ollama_model_creation.py index f56b234..b37d827 100644 --- a/src/components/ollama_model_creation.py +++ b/src/components/ollama_model_creation.py @@ -2,7 +2,7 @@ from nicegui import ui, binding from niceguiasyncelement import AsyncCard from pathlib import Path from utils import ollama -from typing import Optional, Dict +from typing import Optional modelfile_example = """FROM qwen2.5-coder:7b PARAMETER num_ctx 8192 @@ -14,14 +14,65 @@ SYSTEM "Du bist ein Python-Experte." class OllamaModelCreationComponent(AsyncCard): model_name = binding.BindableProperty() model_from = binding.BindableProperty() - parameters = binding.BindableProperty() + system_message = binding.BindableProperty() quantize = binding.BindableProperty() + template = binding.BindableProperty() + show_advanced = binding.BindableProperty() + + # Parameter toggles + use_temperature = binding.BindableProperty() + use_top_k = binding.BindableProperty() + use_top_p = binding.BindableProperty() + use_min_p = binding.BindableProperty() + use_num_ctx = binding.BindableProperty() + use_num_predict = binding.BindableProperty() + use_repeat_last_n = binding.BindableProperty() + use_repeat_penalty = binding.BindableProperty() + use_seed = binding.BindableProperty() + use_stop = binding.BindableProperty() + + # Parameter values + temperature = binding.BindableProperty() + top_k = binding.BindableProperty() + top_p = binding.BindableProperty() + min_p = binding.BindableProperty() + num_ctx = binding.BindableProperty() + num_predict = binding.BindableProperty() + repeat_last_n = binding.BindableProperty() + repeat_penalty = binding.BindableProperty() + seed = binding.BindableProperty() + stop = binding.BindableProperty() def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.is_downloading = False self.download_progress = 0 self.download_status = '' + self.show_advanced = False + + # Initialize parameter defaults + self.temperature = 0.8 + self.top_k = 40 + self.top_p = 0.9 + self.min_p = 0.0 + self.num_ctx = 4096 + self.num_predict = -1 + self.repeat_last_n = 64 + self.repeat_penalty = 1.1 + self.seed = 0 + self.stop = "" + + # Initialize toggles (all off by default) + self.use_temperature = False + self.use_top_k = False + self.use_top_p = False + self.use_min_p = False + self.use_num_ctx = False + self.use_num_predict = False + self.use_repeat_last_n = False + self.use_repeat_penalty = False + self.use_seed = False + self.use_stop = False async def build(self) -> None: self.classes('w-full') @@ -29,57 +80,195 @@ class OllamaModelCreationComponent(AsyncCard): with ui.column().classes('w-full gap-4'): ui.label('Create Model').classes('text-xl font-bold') - ui.input('Model Name', value='qwen2.5-coder-32k-python:latest').props('outlined dense').classes('w-full').bind_value(self, 'model_name') - ui.input('From', value='qwen2.5-coder:7b').props('outlined dense').classes('w-full').bind_value(self, 'model_from') - ui.textarea(placeholder='Parameters').classes('w-full').props('autogrow').bind_value(self, 'parameters') + # Basic fields + ui.input('Model Name', value='my-custom-model:latest').props('outlined dense').classes('w-full').bind_value(self, 'model_name') + ui.input('Base Model', value='llama3.2:3b').props('outlined dense').classes('w-full').bind_value(self, 'model_from') - ui.select(['q4_K_M', 'q4_K_S', 'q8_0'], label='quantize', clearable=True).props('outlined dense').classes('w-full').bind_value(self, 'quantize') + # System message field (commonly used) + ui.textarea('System Message', placeholder='You are a helpful assistant...').classes('w-full').props('autogrow outlined').bind_value(self, 'system_message') + # Parameters section + ui.label('Parameters').classes('text-md font-medium mt-2 mb-3') + + # Generation Parameters + with ui.expansion('Generation', icon='tune').classes('w-full mb-2'): + with ui.column().classes('w-full gap-3 pt-2'): + # Temperature + with ui.row().classes('items-center gap-3 w-full'): + ui.switch().bind_value(self, 'use_temperature') + ui.label('Temperature').classes('min-w-fit') + ui.slider(min=0.0, max=2.0, step=0.1).classes('flex-1').bind_value(self, 'temperature').bind_enabled_from(self, 'use_temperature') + ui.label().bind_text_from(self, 'temperature', backward=lambda x: f'{x:.1f}').classes('text-xs text-gray-500 min-w-fit') + ui.icon('info', size='sm').classes('text-gray-500 cursor-help').tooltip('The temperature of the model. Higher values (e.g., 1.2) make output more creative, lower values (e.g., 0.5) more focused. Default: 0.8') + + # Top K + with ui.row().classes('items-center gap-3 w-full'): + ui.switch().bind_value(self, 'use_top_k') + ui.label('Top K').classes('min-w-fit') + ui.number(value=40, min=1, max=200).classes('flex-1').bind_value(self, 'top_k').bind_enabled_from(self, 'use_top_k') + ui.icon('info', size='sm').classes('text-gray-500 cursor-help').tooltip('Reduces probability of generating nonsense. Higher values (e.g., 100) give more diverse answers, lower values (e.g., 10) are more conservative. Default: 40') + + # Top P + with ui.row().classes('items-center gap-3 w-full'): + ui.switch().bind_value(self, 'use_top_p') + ui.label('Top P').classes('min-w-fit') + ui.slider(min=0.0, max=1.0, step=0.05).classes('flex-1').bind_value(self, 'top_p').bind_enabled_from(self, 'use_top_p') + ui.label().bind_text_from(self, 'top_p', backward=lambda x: f'{x:.2f}').classes('text-xs text-gray-500 min-w-fit') + ui.icon('info', size='sm').classes('text-gray-500 cursor-help').tooltip('Works with top-k. Higher values (e.g., 0.95) lead to more diverse text, lower values (e.g., 0.5) generate more focused text. Default: 0.9') + + # Min P + with ui.row().classes('items-center gap-3 w-full'): + ui.switch().bind_value(self, 'use_min_p') + ui.label('Min P').classes('min-w-fit') + ui.slider(min=0.0, max=1.0, step=0.01).classes('flex-1').bind_value(self, 'min_p').bind_enabled_from(self, 'use_min_p') + ui.label().bind_text_from(self, 'min_p', backward=lambda x: f'{x:.2f}').classes('text-xs text-gray-500 min-w-fit') + ui.icon('info', size='sm').classes('text-gray-500 cursor-help').tooltip('Alternative to top_p. Minimum probability for a token relative to the most likely token. Default: 0.0') + + # Context Parameters + with ui.expansion('Context', icon='memory').classes('w-full mb-2'): + with ui.column().classes('w-full gap-3 pt-2'): + # Context Length + with ui.row().classes('items-center gap-3 w-full'): + ui.switch().bind_value(self, 'use_num_ctx') + ui.label('Context Length').classes('min-w-fit') + ui.number(value=4096, min=1, max=32768).classes('flex-1').bind_value(self, 'num_ctx').bind_enabled_from(self, 'use_num_ctx') + ui.icon('info', size='sm').classes('text-gray-500 cursor-help').tooltip('Size of the context window used to generate the next token. Default: 4096') + + # Max Tokens + with ui.row().classes('items-center gap-3 w-full'): + ui.switch().bind_value(self, 'use_num_predict') + ui.label('Max Tokens').classes('min-w-fit') + ui.number(value=-1, min=-1, max=4096).classes('flex-1').bind_value(self, 'num_predict').bind_enabled_from(self, 'use_num_predict') + ui.icon('info', size='sm').classes('text-gray-500 cursor-help').tooltip('Maximum number of tokens to predict. -1 for infinite generation. Default: -1') + + # Repetition Parameters + with ui.expansion('Repetition Control', icon='repeat').classes('w-full mb-2'): + with ui.column().classes('w-full gap-3 pt-2'): + # Repeat Last N + with ui.row().classes('items-center gap-3 w-full'): + ui.switch().bind_value(self, 'use_repeat_last_n') + ui.label('Repeat Last N').classes('min-w-fit') + ui.number(value=64, min=-1, max=512).classes('flex-1').bind_value(self, 'repeat_last_n').bind_enabled_from(self, 'use_repeat_last_n') + ui.icon('info', size='sm').classes('text-gray-500 cursor-help').tooltip('How far back the model looks to prevent repetition. 0=disabled, -1=num_ctx. Default: 64') + + # Repeat Penalty + with ui.row().classes('items-center gap-3 w-full'): + ui.switch().bind_value(self, 'use_repeat_penalty') + ui.label('Repeat Penalty').classes('min-w-fit') + ui.slider(min=0.5, max=2.0, step=0.1).classes('flex-1').bind_value(self, 'repeat_penalty').bind_enabled_from(self, 'use_repeat_penalty') + ui.label().bind_text_from(self, 'repeat_penalty', backward=lambda x: f'{x:.1f}').classes('text-xs text-gray-500 min-w-fit') + ui.icon('info', size='sm').classes('text-gray-500 cursor-help').tooltip('How strongly to penalize repetitions. Higher values (e.g., 1.5) penalize more, lower values (e.g., 0.9) are more lenient. Default: 1.1') + + # Control Parameters + with ui.expansion('Control', icon='settings').classes('w-full mb-2'): + with ui.column().classes('w-full gap-3 pt-2'): + # Seed + with ui.row().classes('items-center gap-3 w-full'): + ui.switch().bind_value(self, 'use_seed') + ui.label('Seed').classes('min-w-fit') + ui.number(value=0, min=0, max=999999).classes('flex-1').bind_value(self, 'seed').bind_enabled_from(self, 'use_seed') + ui.icon('info', size='sm').classes('text-gray-500 cursor-help').tooltip('Random number seed for generation. Same seed produces same output for same prompt. Default: 0') + + # Stop Sequences + with ui.row().classes('items-center gap-3 w-full'): + ui.switch().bind_value(self, 'use_stop') + ui.label('Stop Sequence').classes('min-w-fit') + ui.input(placeholder='AI assistant:').classes('flex-1').bind_value(self, 'stop').bind_enabled_from(self, 'use_stop') + ui.icon('info', size='sm').classes('text-gray-500 cursor-help').tooltip('Text pattern where the model stops generating. Default: none') + + # Advanced section (collapsible) + with ui.expansion('Advanced Settings', icon='settings').classes('w-full').bind_value(self, 'show_advanced'): + with ui.column().classes('w-full gap-4 pt-2'): + # Quantization + ui.select(['q4_K_M', 'q4_K_S', 'q8_0'], + label='Quantization', clearable=True).props('outlined dense').classes('w-full').bind_value(self, 'quantize') + + # Template field + ui.textarea('Template', + placeholder='{{ if .System }}<|im_start|>system\n{{ .System }}<|im_end|>\n{{ end }}...').classes('w-full').props('autogrow outlined').bind_value(self, 'template') + + # Status and progress with ui.row().classes('items-center gap-2'): ui.icon('check_circle').props(f'color=positive').bind_visibility_from(self, 'download_status', backward=lambda x: True if x == 'success' else False) self.status_label = ui.label().bind_text_from(self, 'download_status') ui.linear_progress(value=0, show_value=False).props('buffer=0.0 animation-speed=0').bind_value_from(self, 'download_progress') - self.create_btn = ui.button('Create Model', on_click=self.create_model).props('color=primary').classes('w-full').bind_enabled_from(self, 'model_id', backward=lambda x: bool(x) and not self.is_downloading) + + # Create button + self.create_btn = ui.button('Create Model', icon='add', on_click=self.create_model).props('color=primary').classes('w-full').bind_enabled_from(self, 'model_name', backward=lambda x: bool(x) and not self.is_downloading) async def create_model(self): - self.parameters = self.parameters.strip() - model_parameters: Optional[Dict[str, str | int | float]] = None - if self.parameters: - model_parameters = {} - for line in self.parameters.split('\n'): - line = line.strip() - try: - key, value = line.split(' ') - except: - ui.notify(f'Not a valid format. {line}') - return - if key in ['num_ctx', 'repeat_last_n', 'seed', 'num_predict', 'top_k']: - model_parameters[key] = int(value) - elif key in ['repeat_penalty', 'temperature', 'top_p', 'min_p']: - model_parameters[key] = float(value) - elif key == 'stop': - model_parameters[key] = value.strip() - else: - ui.notify(f'Unknown parameter: {key}') - return + # Validate required fields + if not self.model_name or not self.model_name.strip(): + ui.notify('Model name is required', type='negative') + return + + if not self.model_from or not self.model_from.strip(): + ui.notify('Base model is required', type='negative') + return + + # Build parameters from toggleable controls + model_parameters = {} + + # Only include parameters that are enabled + if self.use_temperature: + model_parameters['temperature'] = float(self.temperature) + if self.use_top_k: + model_parameters['top_k'] = int(self.top_k) + if self.use_top_p: + model_parameters['top_p'] = float(self.top_p) + if self.use_min_p: + model_parameters['min_p'] = float(self.min_p) + if self.use_num_ctx: + model_parameters['num_ctx'] = int(self.num_ctx) + if self.use_num_predict: + model_parameters['num_predict'] = int(self.num_predict) + if self.use_repeat_last_n: + model_parameters['repeat_last_n'] = int(self.repeat_last_n) + if self.use_repeat_penalty: + model_parameters['repeat_penalty'] = float(self.repeat_penalty) + if self.use_seed: + model_parameters['seed'] = int(self.seed) + if self.use_stop and self.stop.strip(): + model_parameters['stop'] = self.stop.strip() + + # If no parameters are enabled, set to None + if not model_parameters: + model_parameters = None self.create_btn.set_enabled(False) + self.download_status = 'Preparing...' + self.download_progress = 0 + try: - async for chunk in ollama.create_ollama_model(self.model_name, self.model_from, model_parameters, self.quantize): + # Use the updated create_ollama_model function + async for chunk in ollama.create_ollama_model( + self.model_name.strip(), + self.model_from.strip(), + parameters=model_parameters, + system=self.system_message.strip() if self.system_message else None, + template=self.template.strip() if self.template else None, + quantizie=self.quantize if self.quantize else None + ): if chunk.strip(): # Parse the JSON chunk and extract content import json try: chunk_data = json.loads(chunk) - self.download_status = chunk_data['status'] + self.download_status = chunk_data.get('status', 'Processing...') if 'total' in chunk_data and 'completed' in chunk_data: self.download_progress = chunk_data['completed'] / chunk_data['total'] - print(self.download_progress) else: self.download_progress = 0 except json.JSONDecodeError: pass # Skip malformed chunks + + # Success + self.download_status = 'success' + self.download_progress = 1.0 + ui.notify(f'Model "{self.model_name}" created successfully!', type='positive') + except Exception as e: - ui.notify(f'Error: {str(e)}', type='negative') + self.download_status = f'Error: {str(e)}' + ui.notify(f'Error creating model: {str(e)}', type='negative') finally: self.create_btn.set_enabled(True) diff --git a/src/utils/ollama.py b/src/utils/ollama.py index 8c32384..3ac9571 100644 --- a/src/utils/ollama.py +++ b/src/utils/ollama.py @@ -32,13 +32,16 @@ async def active_models(url='http://127.0.0.1:11434'): return response.json()["models"] -async def create_ollama_model(model_name, model_from, parameters=None, quantizie=None, url='http://127.0.0.1:11434'): +async def create_ollama_model(model_name, model_from, parameters=None, system=None, template=None, quantizie=None, url='http://127.0.0.1:11434'): data = { "model": model_name, "from": model_from, "stream": True } - + if system: + data['system'] = system + if template: + data['template'] = template if parameters: data['parameters'] = parameters if quantizie: