Spaces:

universeofml
/

DeepFocusTrain

Runtime error

App Files Files Community

katsukiai commited on Mar 5

Commit

ffd44b8

verified ·

1 Parent(s): dddf90e

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -31

app.py CHANGED Viewed

@@ -11,53 +11,68 @@ from huggingface_hub import HfApi
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
-# Load GPT-2 model and tokenizer
-MODEL_NAME = "gpt2"
-logger.info(f"Loading model: {MODEL_NAME} (CPU mode)")
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-tokenizer.pad_token = tokenizer.eos_token  # GPT-2 không có padding token, nên dùng eos_token
-model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
-# Function to process text with GPT-2
-def process_text_with_model(text):
-    logger.info("Processing text with GPT-2 model (CPU)...")
     inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
     outputs = model.generate(**inputs, max_length=200)
-    processed_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return processed_text
 # Function to convert text to JSON
 def text_to_json(text):
-    lines = text.strip().split("\n")
-    data = [{"text": line} for line in lines]
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
     filename = f"output_{timestamp}.json"
     with open(filename, "w") as f:
-        json.dump(data, f, indent=4)
     logger.info(f"JSON file created: {filename}")
     return filename
 # Function to generate JSON and upload to Hugging Face
-def generate_and_upload(text):
     try:
         if not text.strip():
             raise ValueError("Text input is empty.")
-        logger.info(f"Received text input: {text}")
-        # Process text with GPT-2
-        processed_text = process_text_with_model(text)
         logger.info(f"Processed text: {processed_text}")
-        # Convert processed text to JSON
         json_file = text_to_json(processed_text)
         # Get Hugging Face API token
         token = os.getenv("HUGGINGFACE_API_TOKEN")
         if not token:
-            raise ValueError("Hugging Face API token not found. Please set HUGGINGFACE_API_TOKEN environment variable.")
         # Upload file to Hugging Face
         api = HfApi()
@@ -86,16 +101,17 @@ def generate_and_upload(text):
 with gr.Blocks() as demo:
     with gr.Tab("About"):
         gr.Markdown("""
-        # Text Processor with GPT-2 (CPU)
-        - Processes text with GPT-2 Transformer
-        - Converts output to JSON
         - Uploads to Hugging Face
         ## Instructions:
-        1. Enter text in the "Generate" tab.
-        2. Click "Generate and Upload."
-        3. Download JSON if needed.
-        4. Check upload status.
         ## Requirements:
         - **Runs on CPU** (No GPU required).
@@ -103,6 +119,7 @@ with gr.Blocks() as demo:
         """)
     with gr.Tab("Generate"):
         text_input = gr.Textbox(label="Enter text")
         output_message = gr.Textbox(label="Status message")
         json_file_downloader = gr.File(label="Download JSON", interactive=True)
@@ -110,7 +127,7 @@ with gr.Blocks() as demo:
         generate_button.click(
             fn=generate_and_upload,
-            inputs=text_input,
             outputs=[output_message, json_file_downloader]
         )

 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
+# List of 37 popular models
+MODEL_LIST = [
+    "gpt2", "gpt2-medium", "gpt2-large", "gpt2-xl",
+    "facebook/opt-1.3b", "facebook/opt-2.7b", "facebook/opt-6.7b",
+    "mistralai/Mistral-7B-Instruct-v0.1", "mistralai/Mixtral-8x7B-Instruct",
+    "meta-llama/Llama-2-7b-chat-hf", "meta-llama/Llama-2-13b-chat-hf",
+    "microsoft/DialoGPT-small", "microsoft/DialoGPT-medium", "microsoft/DialoGPT-large",
+    "bigscience/bloom-560m", "bigscience/bloomz-560m",
+    "EleutherAI/gpt-neo-125m", "EleutherAI/gpt-neo-1.3B", "EleutherAI/gpt-neo-2.7B",
+    "EleutherAI/gpt-j-6B", "EleutherAI/gpt-neox-20b",
+    "huggingfaceh4/starchat-alpha", "huggingfaceh4/zephyr-7b-alpha",
+    "deepseek-ai/deepseek-coder-1.3b", "deepseek-ai/deepseek-coder-6.7b",
+    "deepseek-ai/deepseek-v3", "databricks/dolly-v2-7b", "cerebras/Cerebras-GPT-1.3B",
+    "tiiuae/falcon-7b-instruct", "tiiuae/falcon-40b-instruct",
+    "google/gemma-2b", "google/gemma-7b", "google/flan-t5-large",
+    "stabilityai/stablelm-tuned-alpha-7b", "stabilityai/stablelm-2-7b-chat"
+]
+# Function to load selected model
+def load_model(model_name):
+    logger.info(f"Loading model: {model_name} (CPU mode)")
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    tokenizer.pad_token = tokenizer.eos_token  # Avoid padding token errors
+    model = AutoModelForCausalLM.from_pretrained(model_name)
+    return tokenizer, model
+# Function to process text with selected model
+def process_text(model_name, text):
+    tokenizer, model = load_model(model_name)
+    logger.info(f"Processing text with {model_name}...")
     inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
     outputs = model.generate(**inputs, max_length=200)
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
 # Function to convert text to JSON
 def text_to_json(text):
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
     filename = f"output_{timestamp}.json"
     with open(filename, "w") as f:
+        json.dump([{"text": text}], f, indent=4)
     logger.info(f"JSON file created: {filename}")
     return filename
 # Function to generate JSON and upload to Hugging Face
+def generate_and_upload(model_name, text):
     try:
         if not text.strip():
             raise ValueError("Text input is empty.")
+        logger.info(f"Received text input for model {model_name}")
+        # Process text
+        processed_text = process_text(model_name, text)
         logger.info(f"Processed text: {processed_text}")
+        # Convert to JSON
         json_file = text_to_json(processed_text)
         # Get Hugging Face API token
         token = os.getenv("HUGGINGFACE_API_TOKEN")
         if not token:
+            raise ValueError("Hugging Face API token not found. Please set HUGGINGFACE_API_TOKEN.")
         # Upload file to Hugging Face
         api = HfApi()
 with gr.Blocks() as demo:
     with gr.Tab("About"):
         gr.Markdown("""
+        # Text Processor with Selectable Model (CPU)
+        - Choose from **37 popular transformer models**
+        - Processes text and converts to JSON
         - Uploads to Hugging Face
         ## Instructions:
+        1. Select a model from the dropdown.
+        2. Enter text in the "Generate" tab.
+        3. Click "Generate and Upload."
+        4. Download JSON if needed.
+        5. Check upload status.
         ## Requirements:
         - **Runs on CPU** (No GPU required).
         """)
     with gr.Tab("Generate"):
+        model_selector = gr.Dropdown(choices=MODEL_LIST, value="gpt2", label="Choose Model")
         text_input = gr.Textbox(label="Enter text")
         output_message = gr.Textbox(label="Status message")
         json_file_downloader = gr.File(label="Download JSON", interactive=True)
         generate_button.click(
             fn=generate_and_upload,
+            inputs=[model_selector, text_input],
             outputs=[output_message, json_file_downloader]
         )