from fastapi import FastAPI from pydantic import BaseModel from transformers import AutoModelForCausalLM, AutoTokenizer from transformers import LlamaForCausalLM, LlamaTokenizer # BU SATIR ÖNEMLİ import torch app = FastAPI() # Model ve tokenizer'ı yükle #model = AutoModelForCausalLM.from_pretrained("memorease/memorease-quizgen") #tokenizer = AutoTokenizer.from_pretrained("memorease/memorease-quizgen") # Doğrudan llama modeli yükleniyor model = LlamaForCausalLM.from_pretrained("memorease/memorease-quizgen") tokenizer = LlamaTokenizer.from_pretrained("memorease/memorease-quizgen") class Memory(BaseModel): description: str @app.post("/generate") def generate(memory: Memory): prompt = f"Soru üret: {memory.description}" inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=128) outputs = model.generate(**inputs, max_new_tokens=64) question = tokenizer.decode(outputs[0], skip_special_tokens=True) return {"question": question}