Spaces:

memorease
/

memorease-llm

Sleeping

File size: 1,446 Bytes

7efac98
8c2067d
ac2fa94
863d80b
5db876d
ac2fa94
5db876d
8c2067d
7efac98
8c2067d
7efac98
5db876d
863d80b
 
 
5db876d
 
 
 
 
 
 
863d80b
5db876d
7efac98
5db876d
8c2067d
863d80b
 
8c2067d
5db876d
863d80b
 
ac2fa94
5db876d
ac2fa94
 
 
 
5db876d
 
ac2fa94

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
import json
import random

# Model yükle
model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

# Hafızadan örnekleri yükle
with open("memory_questions.json", "r") as f:
    memory_data = json.load(f)

# Few-shot prompt oluşturan fonksiyon
def get_few_shot_prompt(memory, k=5):
    examples = random.sample(memory_data, k)
    few_shot = "\n".join(
        [f"Memory: {ex['description']}\nQuestion: {ex['question']}" for ex in examples]
    )
    return f"{few_shot}\nMemory: {memory}\nQuestion:"

# Ana fonksiyon
def generate_question(memory):
    prompt = get_few_shot_prompt(memory)
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids
    output = model.generate(input_ids, max_new_tokens=50, do_sample=False)
    result = tokenizer.decode(output[0], skip_special_tokens=True)

    # Sadece cevabı ayıkla
    lines = result.strip().split("Question:")
    return lines[-1].strip() if len(lines) > 1 else result.strip()

# Gradio arayüzü
iface = gr.Interface(
    fn=generate_question,
    inputs=gr.Textbox(label="Your Memory"),
    outputs=gr.Textbox(label="Generated Question"),
    title="MemoRease - TinyLLaMA Chat Generator",
    description="Write a memory, get a question to help recall more details!"
)

iface.launch()