Spaces:

PabloTJ
/

palindroms

Running

App Files Files Community

PabloTJ commited on 24 days ago

Commit

fa826ee

verified ·

1 Parent(s): 171b1a5

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -23

app.py CHANGED Viewed

@@ -5,12 +5,16 @@ import numpy as np
 import pandas as pd
 import os
-# Set a seed for reproducibility
 set_seed(42)
-# Define two premium generation models for better quality outputs.
 premium_models = [
-    "mistralai/Mistral-7B-v0.1",
     "HuggingFaceH4/zephyr-7b-beta"
 ]
@@ -23,13 +27,13 @@ languages = {
     "pt": "Portuguese"
 }
-# Define two cost-effective grammar evaluation models.
 grammar_model_names = [
     "vennify/t5-base-grammar-correction",
     "hassaanik/grammar-correction-model"
 ]
-# Functions to load pipelines on demand.
 def load_generation_pipeline(model_name):
     try:
         return pipeline("text-generation", model=model_name)
@@ -37,6 +41,7 @@ def load_generation_pipeline(model_name):
         print(f"Error loading generation model {model_name}: {e}")
         return None
 def load_grammar_pipeline(model_name):
     try:
         return pipeline("text2text-generation", model=model_name)
@@ -44,13 +49,14 @@ def load_grammar_pipeline(model_name):
         print(f"Error loading grammar model {model_name}: {e}")
         return None
-# Pre-load grammar evaluator pipelines.
 rater_models = []
 for model_name in grammar_model_names:
     p = load_grammar_pipeline(model_name)
     if p is not None:
         rater_models.append(p)
 def clean_text(text):
     return re.sub(r'[^a-zA-Z0-9]', '', text.lower())
@@ -58,15 +64,16 @@ def is_palindrome(text):
     cleaned = clean_text(text)
     return cleaned == cleaned[::-1]
-# Updated prompt that instructs the model to output ONLY the palindrome.
 def build_prompt(lang):
     return (
         f"Instruction: Generate a single original palindrome in {lang}.\n"
         "Output only the palindrome. The palindrome should be a continuous text that reads the same forward and backward.\n"
-        "Do not output any additional text, commentary, or the prompt itself.\n"
         "Palindrome: "
     )
 def grammar_prompt(pal, lang):
     return (
         f"Rate from 0 to 100 how grammatically correct this palindrome is in {lang}. "
@@ -74,6 +81,7 @@ def grammar_prompt(pal, lang):
         f'"{pal}"\n'
     )
 def extract_score(text):
     match = re.search(r"\d{1,3}", text)
     if match:
@@ -81,25 +89,23 @@ def extract_score(text):
         return min(max(score, 0), 100)
     return 0
-# Main benchmark function that runs all tests at once and saves results automatically.
 def run_benchmark_all():
     results = []
     for model_name in premium_models:
         gen_pipeline = load_generation_pipeline(model_name)
         if gen_pipeline is None:
             continue
         for code, lang in languages.items():
             prompt = build_prompt(lang)
             try:
                 gen_output = gen_pipeline(prompt, max_new_tokens=100, do_sample=True)[0]['generated_text'].strip()
             except Exception as e:
                 gen_output = f"Error generating text: {e}"
             valid = is_palindrome(gen_output)
             cleaned_len = len(clean_text(gen_output))
             scores = []
             for rater in rater_models:
                 rprompt = grammar_prompt(gen_output, lang)
@@ -123,28 +129,23 @@ def run_benchmark_all():
                 "Final Score": final_score
             })
-    # Create DataFrame and sort by Final Score.
     df = pd.DataFrame(results).sort_values(by="Final Score", ascending=False).reset_index(drop=True)
-    # Automatically save results to a CSV file.
     csv_path = "benchmark_results.csv"
     df.to_csv(csv_path, index=False)
-    print(f"CSV file saved to {os.path.abspath(csv_path)}")
-    # Return both the DataFrame and the CSV file path for download.
     return gr.Dataframe(df), csv_path
-# Build the Gradio UI using Blocks for a canvas layout.
 with gr.Blocks(title="Premium Model Palindrome Benchmark") as demo:
     gr.Markdown("# Premium Model Palindrome Benchmark")
-    gr.Markdown("This benchmark runs automatically over 2 premium text-generation models across 5 languages (English, German, Spanish, French, Portuguese) and saves the results to a CSV file when done.")
     with gr.Row():
         run_button = gr.Button("Run All Benchmarks")
     output_table = gr.Dataframe(label="Benchmark Results")
     output_file = gr.File(label="Download CSV Results")
     run_button.click(fn=run_benchmark_all, inputs=[], outputs=[output_table, output_file])
 demo.launch()

 import pandas as pd
 import os
+# Set seed for reproducibility
 set_seed(42)
+# Define the six premium generation models:
 premium_models = [
+    "Qwen/Qwen2.5-Omni-7B",
+    "Qwen/Qwen2.5-VL-7B-Instruct",
+    "deepseek-ai/Janus-Pro-7B",
+    "meta-llama/Llama-2-7b-hf",
+    "Alibaba-NLP/gte-Qwen2-7B-instruct",
     "HuggingFaceH4/zephyr-7b-beta"
 ]
     "pt": "Portuguese"
 }
+# Define two cost-effective grammar evaluation models:
 grammar_model_names = [
     "vennify/t5-base-grammar-correction",
     "hassaanik/grammar-correction-model"
 ]
+# Function to load generation pipelines on demand
 def load_generation_pipeline(model_name):
     try:
         return pipeline("text-generation", model=model_name)
         print(f"Error loading generation model {model_name}: {e}")
         return None
+# Function to load grammar evaluation pipelines on demand
 def load_grammar_pipeline(model_name):
     try:
         return pipeline("text2text-generation", model=model_name)
         print(f"Error loading grammar model {model_name}: {e}")
         return None
+# Pre-load grammar evaluators
 rater_models = []
 for model_name in grammar_model_names:
     p = load_grammar_pipeline(model_name)
     if p is not None:
         rater_models.append(p)
+# Utility functions to clean text and check for palindromes
 def clean_text(text):
     return re.sub(r'[^a-zA-Z0-9]', '', text.lower())
     cleaned = clean_text(text)
     return cleaned == cleaned[::-1]
+# Build prompt with clear instructions to output only the palindrome.
 def build_prompt(lang):
     return (
         f"Instruction: Generate a single original palindrome in {lang}.\n"
         "Output only the palindrome. The palindrome should be a continuous text that reads the same forward and backward.\n"
+        "Do not output any additional text or commentary.\n"
         "Palindrome: "
     )
+# Build prompt for grammar evaluation
 def grammar_prompt(pal, lang):
     return (
         f"Rate from 0 to 100 how grammatically correct this palindrome is in {lang}. "
         f'"{pal}"\n'
     )
+# Extract numeric score from text output
 def extract_score(text):
     match = re.search(r"\d{1,3}", text)
     if match:
         return min(max(score, 0), 100)
     return 0
+# Main benchmark function - runs all tests and saves CSV automatically.
 def run_benchmark_all():
     results = []
     for model_name in premium_models:
         gen_pipeline = load_generation_pipeline(model_name)
         if gen_pipeline is None:
             continue
         for code, lang in languages.items():
             prompt = build_prompt(lang)
             try:
                 gen_output = gen_pipeline(prompt, max_new_tokens=100, do_sample=True)[0]['generated_text'].strip()
             except Exception as e:
                 gen_output = f"Error generating text: {e}"
             valid = is_palindrome(gen_output)
             cleaned_len = len(clean_text(gen_output))
+            # Evaluate grammar using both grammar models
             scores = []
             for rater in rater_models:
                 rprompt = grammar_prompt(gen_output, lang)
                 "Final Score": final_score
             })
     df = pd.DataFrame(results).sort_values(by="Final Score", ascending=False).reset_index(drop=True)
     csv_path = "benchmark_results.csv"
     df.to_csv(csv_path, index=False)
+    print(f"CSV saved to {os.path.abspath(csv_path)}")
     return gr.Dataframe(df), csv_path
+# Build the Gradio UI using a Blocks layout
 with gr.Blocks(title="Premium Model Palindrome Benchmark") as demo:
     gr.Markdown("# Premium Model Palindrome Benchmark")
+    gr.Markdown(
+        "This benchmark runs automatically over 6 premium text-generation models across 5 languages "
+        "(English, German, Spanish, French, Portuguese) and saves the results to a CSV file upon completion."
+    )
     with gr.Row():
         run_button = gr.Button("Run All Benchmarks")
     output_table = gr.Dataframe(label="Benchmark Results")
     output_file = gr.File(label="Download CSV Results")
     run_button.click(fn=run_benchmark_all, inputs=[], outputs=[output_table, output_file])
 demo.launch()