init
This commit is contained in:
174
src/main.py
Normal file
174
src/main.py
Normal file
@@ -0,0 +1,174 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
CensorBot - Data Sanitization Tool
|
||||
A NiceGUI-based application for removing sensitive customer information from text
|
||||
"""
|
||||
import asyncio
|
||||
import os
|
||||
import random
|
||||
from typing import List
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from nicegui import ui
|
||||
|
||||
from lib import get_response, LLMBackend, LLMMessage
|
||||
load_dotenv()
|
||||
|
||||
|
||||
def get_random_example_text() -> str:
|
||||
examples_dir = "examples"
|
||||
|
||||
# Get all .txt files
|
||||
txt_files = [f for f in os.listdir(examples_dir) if f.endswith('.txt')]
|
||||
|
||||
if not txt_files:
|
||||
raise FileNotFoundError("No .txt files found in examples directory")
|
||||
|
||||
# Pick random file
|
||||
random_file = random.choice(txt_files)
|
||||
file_path = os.path.join(examples_dir, random_file)
|
||||
|
||||
# Read and return content
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
async def main():
|
||||
input_text: ui.textarea
|
||||
output_text: ui.textarea
|
||||
|
||||
prompt: str
|
||||
|
||||
with open('src/prompt.md') as prompt_file:
|
||||
prompt = prompt_file.read()
|
||||
|
||||
backend: LLMBackend = {'base_url': os.environ['BACKEND_BASE_URL'],
|
||||
'api_token': os.environ['BACKEND_API_TOKEN'],
|
||||
'model': os.environ['BACKEND_MODEL']}
|
||||
|
||||
async def censor_input():
|
||||
messages: List[LLMMessage] = [
|
||||
{'role': 'system', 'content': prompt},
|
||||
{'role': 'user', 'content': input_text.value}
|
||||
]
|
||||
try:
|
||||
# Stream the response with cancellation support
|
||||
async for chunk in get_response(backend, messages, True): # type: ignore
|
||||
# Check if task was cancelled
|
||||
current_task = asyncio.current_task()
|
||||
if current_task and current_task.cancelled():
|
||||
break
|
||||
|
||||
if 'content' in chunk:
|
||||
output_text.value += chunk['content']
|
||||
print(chunk['content'])
|
||||
|
||||
# Small delay to allow UI updates and cancellation checks
|
||||
await asyncio.sleep(0.01)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
ui.notify('Generation stopped by user', type='info')
|
||||
# Save whatever content we have so far
|
||||
return
|
||||
|
||||
# Application header
|
||||
with ui.header(elevated=True).classes('q-pa-md'):
|
||||
ui.label('🔒 CensorBot').classes('text-h4 text-weight-bold')
|
||||
ui.label('Secure Data Sanitization for IT Service Companies').classes('text-subtitle1 text-grey-7')
|
||||
|
||||
# Main container
|
||||
with ui.column().classes('w-full max-w-6xl mx-auto q-pa-lg q-gutter-md'):
|
||||
|
||||
# Input section
|
||||
with ui.card().classes('w-full'):
|
||||
ui.label('Original Text').classes('text-h6 text-weight-medium')
|
||||
ui.label('Contains sensitive customer information').classes('text-caption text-grey-7')
|
||||
|
||||
input_text = ui.textarea(
|
||||
placeholder='Paste your text here...\n\n'
|
||||
'Example:\n'
|
||||
'Customer John Smith called from 555-1234 about issue with account john@example.com',
|
||||
value=get_random_example_text()
|
||||
).classes('w-full').style('font-family: monospace').props('autogrow')
|
||||
|
||||
# Character count
|
||||
char_count_label = ui.label('0 characters').classes('text-caption text-grey-6')
|
||||
|
||||
# Output section
|
||||
with ui.card().classes('w-full'):
|
||||
ui.label('Censored Text').classes('text-h6 text-weight-medium')
|
||||
ui.label('Safe to use with external LLMs').classes('text-caption text-green-7')
|
||||
|
||||
output_text = ui.textarea(
|
||||
placeholder='Censored text will appear here...\n\n'
|
||||
'Example:\n'
|
||||
'Customer [CUSTOMER_NAME] called from [PHONE_NUMBER] about issue with account [EMAIL]',
|
||||
value=''
|
||||
).classes('w-full').style('font-family: monospace; background-color: #f5f5f5').props('readonly autogrow')
|
||||
|
||||
# Copy button
|
||||
with ui.row().classes('w-full justify-end q-gutter-sm'):
|
||||
copy_button = ui.button('Copy to Clipboard', icon='content_copy').props('outline')
|
||||
copy_button.disable()
|
||||
|
||||
# Action buttons
|
||||
with ui.card().classes('w-full'):
|
||||
with ui.row().classes('w-full justify-center q-gutter-md'):
|
||||
clear_button = ui.button('Clear All', icon='clear').props('outline color=negative')
|
||||
process_button = ui.button('Censor Data', icon='shield', on_click=censor_input).props('color=primary size=lg')
|
||||
|
||||
# Statistics section
|
||||
with ui.expansion('Processing Statistics', icon='analytics').classes('w-full'):
|
||||
with ui.row().classes('w-full q-gutter-md'):
|
||||
with ui.column().classes('col'):
|
||||
ui.label('Items Censored').classes('text-weight-medium')
|
||||
stats_censored = ui.label('0').classes('text-h4 text-primary')
|
||||
|
||||
with ui.column().classes('col'):
|
||||
ui.label('Processing Time').classes('text-weight-medium')
|
||||
stats_time = ui.label('0.0s').classes('text-h4 text-primary')
|
||||
|
||||
with ui.column().classes('col'):
|
||||
ui.label('Data Reduction').classes('text-weight-medium')
|
||||
stats_reduction = ui.label('0%').classes('text-h4 text-primary')
|
||||
|
||||
# Event handlers (mockup only - no real functionality)
|
||||
def update_char_count():
|
||||
char_count_label.text = f'{len(input_text.value)} characters'
|
||||
|
||||
def mock_copy():
|
||||
ui.notify('Text copied to clipboard (mockup)', type='positive')
|
||||
|
||||
def clear_all():
|
||||
input_text.value = ''
|
||||
output_text.value = ''
|
||||
copy_button.disable()
|
||||
stats_censored.text = '0'
|
||||
stats_time.text = '0.0s'
|
||||
stats_reduction.text = '0%'
|
||||
update_char_count()
|
||||
|
||||
# Connect event handlers
|
||||
input_text.on('input', update_char_count)
|
||||
copy_button.on_click(mock_copy)
|
||||
clear_button.on_click(clear_all)
|
||||
|
||||
# Footer
|
||||
with ui.footer().classes('q-pa-md text-center'):
|
||||
ui.label('CensorBot - Protecting Customer Privacy').classes('text-caption text-grey-6')
|
||||
ui.label('⚠️ This is a mockup - no actual processing implemented yet').classes('text-caption text-orange')
|
||||
|
||||
|
||||
# Run the application
|
||||
if __name__ in {"__main__", "__mp_main__"}:
|
||||
@ui.page('/')
|
||||
async def _():
|
||||
await main()
|
||||
|
||||
ui.run(
|
||||
title='CensorBot - Data Sanitization Tool',
|
||||
favicon='🔒',
|
||||
show=False,
|
||||
dark=False,
|
||||
port=8080
|
||||
)
|
||||
43
src/prompt.md
Normal file
43
src/prompt.md
Normal file
@@ -0,0 +1,43 @@
|
||||
# Data Censoring Instructions
|
||||
|
||||
You are a data sanitization assistant. Your sole purpose is to identify and replace sensitive customer information with appropriate placeholders while maintaining the context and meaning of the text.
|
||||
|
||||
## What to Censor
|
||||
|
||||
Replace the following types of sensitive information:
|
||||
|
||||
1. **Personal Names**: Replace with `[NAME]` or `[CUSTOMER_NAME]`
|
||||
2. **Email Addresses**: Replace with `[EMAIL]`
|
||||
3. **Phone Numbers**: Replace with `[PHONE]`
|
||||
4. **Physical Addresses**: Replace with `[ADDRESS]`
|
||||
5. **Social Security Numbers**: Replace with `[SSN]`
|
||||
6. **Credit Card Numbers**: Replace with `[CREDIT_CARD]`
|
||||
7. **Bank Account Numbers**: Replace with `[ACCOUNT_NUMBER]`
|
||||
8. **Driver's License Numbers**: Replace with `[LICENSE]`
|
||||
9. **Passport Numbers**: Replace with `[PASSPORT]`
|
||||
10. **Medical Record Numbers**: Replace with `[MRN]`
|
||||
11. **IP Addresses**: Replace with `[IP_ADDRESS]`
|
||||
12. **Usernames/User IDs**: Replace with `[USERNAME]`
|
||||
13. **Passwords**: Replace with `[PASSWORD]`
|
||||
14. **Company Names** (when context indicates it's customer data): Replace with `[COMPANY]`
|
||||
15. **Dates of Birth**: Replace with `[DOB]`
|
||||
|
||||
## Rules
|
||||
|
||||
1. **Preserve Context**: Keep all non-sensitive text exactly as provided
|
||||
2. **Maintain Structure**: Preserve formatting, punctuation, and spacing
|
||||
3. **Be Consistent**: Use the same placeholder for the same entity throughout the text
|
||||
4. **No Commentary**: Output ONLY the censored text, no explanations or additional text
|
||||
5. **When in Doubt**: If something might be sensitive, censor it
|
||||
|
||||
## Example
|
||||
|
||||
Input:
|
||||
"John Smith from Acme Corp called at 555-1234 about his account john.smith@acme.com. His credit card ending in 4567 was declined."
|
||||
|
||||
Output:
|
||||
"[CUSTOMER_NAME] from [COMPANY] called at [PHONE] about his account [EMAIL]. His credit card ending in [CREDIT_CARD] was declined."
|
||||
|
||||
## Your Task
|
||||
|
||||
Censor the following text by replacing all sensitive information with appropriate placeholders. Output only the censored version:
|
||||
Reference in New Issue
Block a user