Spaces:

memorease
/

memorease-llm

Running

File size: 1,034 Bytes

7efac98
 
ac2fa94
 
7efac98
 
ac2fa94
7efac98
 
 
 
 
 
 
 
 
 
 
 
 
ac2fa94
7efac98
ac2fa94
 
 
 
7efac98
ac2fa94

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import gradio as gr

# LLaMA 2 Chat modeli
model_id = "meta-llama/Llama-2-7b-chat-hf"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,  # CPU çalışıyorsan float32 olabilir
    device_map="auto"
)

def generate_question(memory):
    prompt = f"[INST] You are a helpful assistant. Based on this memory, generate a question that would help the user recall more details:\n\nMemory: {memory}\n\nQuestion: [/INST]"
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=50)
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return result.split("Question:")[-1].strip()

# Arayüz
iface = gr.Interface(
    fn=generate_question,
    inputs=gr.Textbox(label="Your Memory"),
    outputs=gr.Textbox(label="Generated Question"),
    title="LLaMA Chat Question Generator"
)

iface.launch()