Spaces:
Runtime error
Runtime error
import os | |
import nltk | |
import csv | |
import logging | |
from tqdm import tqdm | |
import gradio as gr | |
from transformers import pipeline | |
from huggingface_hub import HfApi, upload_file | |
# Setup Logging | |
logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
# Download NLTK Data | |
nltk.download('all') | |
# Constants | |
HF_REPO = "katsukiai/DeepFocus-X3" | |
TOKENIZER = 'bert-base-uncased' | |
MODELS = ["bert-base-uncased", "gpt2", "roberta-base", "distilbert-base-uncased", "albert-base-v2"] # Add more models as needed | |
# Initialize Models | |
models = {model: pipeline('feature-extraction', model=model) for model in MODELS} | |
# Functions | |
def process_text(text): | |
tokens = nltk.word_tokenize(text) | |
words = list(set(tokens)) | |
means = {} | |
for word in tqdm(words, desc="Processing Words"): | |
word_means = {} | |
for model_name, model in models.items(): | |
try: | |
output = model(word) | |
word_means[model_name] = output[0].mean().item() | |
except Exception as e: | |
logging.error(f"Error processing word {word} with model {model_name}: {e}") | |
word_means[model_name] = None | |
means[word] = word_means | |
return {"tokenizer": tokens, "words": words, "meaning": means} | |
def save_to_csv(data, filename="output.csv"): | |
with open(filename, 'w', newline='', encoding='utf-8') as csvfile: | |
writer = csv.DictWriter(csvfile, fieldnames=["word", "tokenizer", "meanings"]) | |
writer.writeheader() | |
for word in data['words']: | |
writer.writerow({ | |
"word": word, | |
"tokenizer": data['tokenizer'], | |
"meanings": str(data['meaning'][word]) | |
}) | |
def train_dataset(): | |
text = "Your long text goes here..." | |
data = process_text(text) | |
save_to_csv(data) | |
logging.info("Dataset processed and saved to CSV.") | |
def generate_report(): | |
with open('app.log', 'r') as log_file: | |
log_content = log_file.read() | |
return log_content | |
# Gradio Interface | |
def generate_all(text): | |
data = process_text(text) | |
save_to_csv(data) | |
return f"Processed data saved to output.csv" | |
iface = gr.Interface( | |
fn=[generate_all, generate_report], | |
inputs="text", | |
outputs=["text", "text"], | |
title="DeepFocus-X3", | |
tab_titles=["Generate All", "Logs"], | |
description="Generate processed data and view logs." | |
) | |
# Run and Push to HuggingFace | |
def run_and_push(): | |
train_dataset() | |
api = HfApi() | |
api.create_repo(repo_id=HF_REPO, private=False, exist_ok=True) | |
upload_file( | |
path_or_fileobj="output.csv", | |
path_in_repo="output.csv", | |
repo_id=HF_REPO | |
) | |
logging.info("Dataset pushed to HuggingFace.") | |
if __name__ == "__main__": | |
iface.launch() | |
run_and_push() |