palindroms / app.py
PabloTJ's picture
Update app.py
0dfed7a verified
import gradio as gr
from transformers import pipeline, set_seed
import re
import numpy as np
import pandas as pd
import os
import torch
# Check GPU availability (for debugging)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
print("GPU Name:", torch.cuda.get_device_name(0))
else:
print("No GPU detected. Running on CPU.")
# Set seed for reproducibility
set_seed(42)
# Define the six premium generation models:
premium_models = [
"Qwen/Qwen2.5-Omni-7B",
"Qwen/Qwen2.5-VL-7B-Instruct",
"deepseek-ai/Janus-Pro-7B",
"meta-llama/Llama-2-7b-hf",
"Alibaba-NLP/gte-Qwen2-7B-instruct",
"HuggingFaceH4/zephyr-7b-beta"
]
# Define five languages: English, German, Spanish, French, Portuguese.
languages = {
"en": "English",
"de": "German",
"es": "Spanish",
"fr": "French",
"pt": "Portuguese"
}
# Define two cost-effective grammar evaluation models:
grammar_model_names = [
"vennify/t5-base-grammar-correction",
"hassaanik/grammar-correction-model"
]
# Determine device: Use GPU (0) if available, otherwise CPU (-1)
device = 0 if torch.cuda.is_available() else -1
# Function to load generation pipelines with appropriate device setting.
def load_generation_pipeline(model_name):
try:
return pipeline("text-generation", model=model_name, device=device)
except Exception as e:
print(f"Error loading generation model {model_name}: {e}")
return None
# Function to load grammar evaluation pipelines with appropriate device setting.
def load_grammar_pipeline(model_name):
try:
return pipeline("text2text-generation", model=model_name, device=device)
except Exception as e:
print(f"Error loading grammar model {model_name}: {e}")
return None
# Pre-load grammar evaluators.
rater_models = []
for model_name in grammar_model_names:
p = load_grammar_pipeline(model_name)
if p is not None:
rater_models.append(p)
def clean_text(text):
return re.sub(r'[^a-zA-Z0-9]', '', text.lower())
def is_palindrome(text):
cleaned = clean_text(text)
return cleaned == cleaned[::-1]
# Updated prompt instructs the model to output only the palindrome.
def build_prompt(lang):
return (
f"Instruction: Generate a single original palindrome in {lang}.\n"
"Output only the palindrome. The palindrome should be a continuous text that reads the same forward and backward.\n"
"Do not output any additional text or commentary.\n"
"Palindrome: "
)
def grammar_prompt(pal, lang):
return (
f"Rate from 0 to 100 how grammatically correct this palindrome is in {lang}. "
"Return only a number with no explanation.\n\n"
f'"{pal}"\n'
)
def extract_score(text):
match = re.search(r"\d{1,3}", text)
if match:
score = int(match.group())
return min(max(score, 0), 100)
return 0
# Main benchmark function that runs tests and saves CSV results.
def run_benchmark_all():
results = []
for model_name in premium_models:
gen_pipeline = load_generation_pipeline(model_name)
if gen_pipeline is None:
continue
for code, lang in languages.items():
prompt = build_prompt(lang)
try:
gen_output = gen_pipeline(prompt, max_new_tokens=100, do_sample=True)[0]['generated_text'].strip()
except Exception as e:
gen_output = f"Error generating text: {e}"
valid = is_palindrome(gen_output)
cleaned_len = len(clean_text(gen_output))
scores = []
for rater in rater_models:
rprompt = grammar_prompt(gen_output, lang)
try:
rtext = rater(rprompt, max_new_tokens=10)[0]['generated_text']
score = extract_score(rtext)
scores.append(score)
except Exception as e:
scores.append(0)
avg_score = np.mean(scores) if scores else 0
penalty = (avg_score / 100) if valid else (avg_score / 100) * 0.5
final_score = round(cleaned_len * penalty, 2)
results.append({
"Model": model_name,
"Language": lang,
"Palindrome": gen_output,
"Valid": "✅" if valid else "❌",
"Length": cleaned_len,
"Grammar Score": avg_score,
"Final Score": final_score
})
df = pd.DataFrame(results).sort_values(by="Final Score", ascending=False).reset_index(drop=True)
csv_path = "benchmark_results.csv"
df.to_csv(csv_path, index=False)
print(f"CSV saved to {os.path.abspath(csv_path)}")
return gr.Dataframe(df), csv_path
# Build the Gradio UI using Blocks for a canvas layout.
with gr.Blocks(title="Premium Model Palindrome Benchmark") as demo:
gr.Markdown("# Premium Model Palindrome Benchmark")
gr.Markdown(
"This benchmark runs automatically over 6 premium text-generation models across 5 languages and saves the results "
"to a CSV file upon completion."
)
with gr.Row():
run_button = gr.Button("Run All Benchmarks")
output_table = gr.Dataframe(label="Benchmark Results")
output_file = gr.File(label="Download CSV Results")
run_button.click(fn=run_benchmark_all, inputs=[], outputs=[output_table, output_file])
demo.launch()