DeepFocusTrain / app.py
katsukiai's picture
Update app.py
f4e74d7 verified
raw
history blame
4.34 kB
import os
import nltk
import csv
import logging
from tqdm import tqdm
import gradio as gr
from transformers import pipeline
from huggingface_hub import HfApi, upload_file, HfFolder
# Setup Logging
logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Download All NLTK Data
nltk.download('all')
# Constants
HF_REPO = "katsukiai/DeepFocus-X3"
TOKENIZER = 'bert-base-uncased'
MODELS = ["bert-base-uncased", "gpt2", "roberta-base", "distilbert-base-uncased", "albert-base-v2"] # Add more models as needed
# Initialize Models
models = {model: pipeline('feature-extraction', model=model) for model in MODELS}
# Functions
def process_text(text):
tokens = nltk.word_tokenize(text)
words = list(set(tokens))
means = {}
for word in tqdm(words, desc="Processing Words"):
word_means = {}
for model_name, model in models.items():
try:
output = model(word)
word_means[model_name] = output[0].mean().item()
except Exception as e:
logging.error(f"Error processing word {word} with model {model_name}: {e}")
word_means[model_name] = None
means[word] = word_means
return {"tokenizer": tokens, "words": words, "meaning": means}
def save_to_csv(data, filename="output.csv"):
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=["word", "meanings"])
writer.writeheader()
for word in data['words']:
writer.writerow({
"word": word,
"meanings": str(data['meaning'][word])
})
def train_dataset():
text = "Your long text goes here..."
data = process_text(text)
save_to_csv(data)
logging.info("Dataset processed and saved to CSV.")
def generate_report():
with open('app.log', 'r') as log_file:
log_content = log_file.read()
return log_content
# Gradio Interface
def generate_all(text):
data = process_text(text)
save_to_csv(data)
return "Processed data saved to output.csv"
# Custom CSS for Tailwind CSS
custom_css = """
<head>
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/tailwind.min.css" rel="stylesheet">
</head>
"""
with gr.Blocks(css=custom_css) as iface:
gr.Markdown("# DeepFocus-X3")
with gr.Tab("Generate All"):
with gr.Row():
input_text = gr.Textbox(label="Input Text", placeholder="Enter your text here...", container=False)
output_text = gr.Textbox(label="Output", placeholder="Output will appear here...", container=False)
generate_button = gr.Button("Generate", container=False)
generate_button.click(fn=generate_all, inputs=input_text, outputs=output_text)
with gr.Tab("Logs"):
with gr.Row():
log_output = gr.Textbox(label="Logs", placeholder="Logs will appear here...", container=False)
report_button = gr.Button("Report using Logs", container=False)
report_button.click(fn=generate_report, outputs=log_output)
# Run and Push to HuggingFace
def run_and_push():
train_dataset()
try:
api = HfApi()
api.create_repo(repo_id=HF_REPO, private=False, exist_ok=True)
upload_file(
path_or_fileobj="output.csv",
path_in_repo="output.csv",
repo_id=HF_REPO
)
logging.info("Dataset pushed to HuggingFace.")
except Exception as e:
logging.error(f"Error uploading to HuggingFace: {e}")
try:
# Log the error to a separate errors repo
errors_repo = "katsukiai/errors"
api.create_repo(repo_id=errors_repo, private=False, exist_ok=True)
with open('upload_error.log', 'w') as error_file:
error_file.write(f"Error uploading to HuggingFace: {e}\n")
upload_file(
path_or_fileobj="upload_error.log",
path_in_repo="upload_error.log",
repo_id=errors_repo
)
logging.info("Error log pushed to HuggingFace errors repo.")
except Exception as e2:
logging.error(f"Failed to log error to HuggingFace errors repo: {e2}")
if __name__ == "__main__":
iface.launch()
run_and_push()