|
from peft import PeftModel |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig |
|
import gradio as gr |
|
|
|
|
|
base_model = AutoModelForCausalLM.from_pretrained( |
|
"Qwen/Qwen2.5-0.5B-Instruct", |
|
device_map="auto" |
|
) |
|
model = PeftModel.from_pretrained(base_model, "Locon213/ThinkLite") |
|
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct") |
|
|
|
|
|
generation_config = GenerationConfig( |
|
temperature=0.7, |
|
top_p=0.9, |
|
top_k=50, |
|
max_new_tokens=512, |
|
repetition_penalty=1.1, |
|
do_sample=True |
|
) |
|
|
|
def format_prompt(message, history): |
|
prompt = "" |
|
for user_msg, bot_msg in history: |
|
prompt += f"<<<USER>>> {user_msg}\n<<<ASSISTANT>>> {bot_msg}\n" |
|
prompt += f"<<<USER>>> {message}\n<<<ASSISTANT>>>" |
|
return prompt |
|
|
|
def generate_response(message, history): |
|
|
|
formatted_prompt = format_prompt(message, history) |
|
|
|
|
|
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device) |
|
outputs = model.generate( |
|
**inputs, |
|
generation_config=generation_config, |
|
pad_token_id=tokenizer.eos_token_id |
|
) |
|
|
|
|
|
response = tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True) |
|
|
|
return response.strip() |
|
|
|
|
|
chat_interface = gr.ChatInterface( |
|
fn=generate_response, |
|
examples=[ |
|
"Объясни квантовую запутанность простыми словами", |
|
"Как научиться программировать?", |
|
"Напиши стихотворение про ИИ" |
|
], |
|
title="ThinkLite Chat", |
|
description="Общайтесь с ThinkLite - адаптированной версией Qwen2.5-0.5B-Instruct", |
|
theme="soft" |
|
) |
|
|
|
if __name__ == "__main__": |
|
chat_interface.launch() |