Spaces:
Running
Running
import gradio as gr | |
from transformers import T5Tokenizer, T5ForConditionalGeneration | |
import re | |
# Load model and tokenizer | |
model_name = "valhalla/t5-base-e2e-qg" | |
tokenizer = T5Tokenizer.from_pretrained(model_name, use_fast=False) | |
model = T5ForConditionalGeneration.from_pretrained(model_name) | |
def generate_qas(paragraph): | |
sentences = re.split(r'(?<=[.!?]) +', paragraph.strip()) | |
qas = [] | |
for sent in sentences: | |
if not sent.strip(): | |
continue | |
input_text = f"generate questions: {sent.strip()}" | |
input_ids = tokenizer.encode(input_text, return_tensors="pt") | |
outputs = model.generate(input_ids, max_length=128, num_beams=4, do_sample=False) | |
raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Split on <sep> and remove duplicates and empties | |
questions = [q.strip() for q in raw_output.split("<sep>") if q.strip()] | |
seen = set() | |
unique_questions = [] | |
for q in questions: | |
if q not in seen: | |
unique_questions.append(q) | |
seen.add(q) | |
for q in unique_questions: | |
qas.append(f"Q: {q}\nA: {sent.strip()}") | |
return "\n\n".join(qas) | |
gr.Interface( | |
fn=generate_qas, | |
inputs=gr.Textbox(label="Enter a paragraph", lines=8), | |
outputs=gr.Textbox(label="Generated Questions and Answers"), | |
title="🧠 Multi Q&A Generator", | |
description="Generates diverse question-answer pairs for each sentence using T5." | |
).launch() | |