Spaces:

PabloTJ
/

palindroms

Running

App Files Files Community

PabloTJ commited on 20 days ago

Commit

12a6276

verified ·

1 Parent(s): 2cbbb89

Upload 3 files

Browse files

Files changed (3) hide show

README.md +2 -13
app.py +74 -0
requirements.txt +5 -0

README.md CHANGED Viewed

@@ -1,14 +1,3 @@
----
-title: Palindroms
-emoji: 🏃
-colorFrom: pink
-colorTo: green
-sdk: gradio
-sdk_version: 5.24.0
-app_file: app.py
-pinned: false
-license: mit
-short_description: LLM benchmark for palidrom generation
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


1	+ # 🔁 LLM Palindrome Benchmark











2
3	+ This app generates long, original palindromes in 10 languages using Hugging Face LLMs, rates their grammar using other models, and scores them based on correctness and fluency.

app.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import gradio as gr
+from transformers import pipeline
+import re
+from langdetect import detect
+import numpy as np
+import pandas as pd
+# Load models for generation and rating
+gen_model = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.1")
+rater_models = [
+    pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta"),
+    pipeline("text-generation", model="google/flan-t5-large")
+]
+# Language list
+languages = {
+    "en": "English", "es": "Spanish", "fr": "French", "de": "German", "it": "Italian",
+    "pt": "Portuguese", "ru": "Russian", "ar": "Arabic", "hi": "Hindi", "ja": "Japanese"
+}
+def clean_text(text):
+    return re.sub(r'[^a-zA-Z0-9]', '', text.lower())
+def is_palindrome(text):
+    cleaned = clean_text(text)
+    return cleaned == cleaned[::-1]
+def grammar_prompt(pal, lang):
+    return f'''Rate from 0 to 100 how grammatically correct this palindrome is in {lang}. Only return a number with no explanation:\n\n"{pal}"\n'''
+def extract_score(text):
+    match = re.search(r"\d{1,3}", text)
+    if match:
+        score = int(match.group())
+        return min(max(score, 0), 100)
+    return 0
+def run_benchmark():
+    results = []
+    for code, lang in languages.items():
+        prompt = f'''Write the longest original palindrome you can in {lang}. It should be creative and not a known palindrome. If it is not a correct palindrome, you will lose points according to how correct it is.'''
+        gen_output = gen_model(prompt, max_new_tokens=100, do_sample=True)[0]['generated_text'].strip()
+        valid = is_palindrome(gen_output)
+        cleaned_len = len(clean_text(gen_output))
+        detected_lang = detect(gen_output)
+        scores = []
+        for rater in rater_models:
+            rprompt = grammar_prompt(gen_output, lang)
+            rtext = rater(rprompt, max_new_tokens=10)[0]['generated_text']
+            score = extract_score(rtext)
+            scores.append(score)
+        avg_score = np.mean(scores)
+        penalty = (avg_score / 100) if valid else (avg_score / 100) * 0.5
+        final_score = round(cleaned_len * penalty, 2)
+        results.append({
+            "Language": lang,
+            "Palindrome": gen_output,
+            "Valid": "✅" if valid else "❌",
+            "Length": cleaned_len,
+            "Grammar Score": avg_score,
+            "Final Score": final_score,
+            "Detected Lang": detected_lang
+        })
+    df = pd.DataFrame(results).sort_values(by="Final Score", ascending=False).reset_index(drop=True)
+    return gr.Dataframe(df)
+iface = gr.Interface(fn=run_benchmark, inputs=[], outputs="dataframe", title="🔁 LLM Palindrome Benchmark")
+iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+transformers
+gradio
+langdetect
+pandas
+accelerate