DeepFocusTrain / app.py
katsukiai's picture
Update app.py
ad4152f verified
raw
history blame
3.14 kB
import os
import csv
import logging
import gradio as gr
import nltk
from datasets import Dataset, DatasetDict, DatasetInfo, Features, Value, ClassLabel
from huggingface_hub import HfApi, Repository, create_repo
from tqdm import tqdm
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet as wn
import random
import string
# Ensure necessary NLTK resources are downloaded
nltk.download('all')
#nltk.download('wordnet')
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Function to generate random words
def generate_random_words(num_words=100):
words = []
for _ in range(num_words):
word_length = random.randint(3, 10)
word = ''.join(random.choices(string.ascii_lowercase, k=word_length))
words.append(word)
return words
# Function to get meanings of words using NLTK WordNet
def get_word_meanings(words):
meanings = {}
for word in words:
synsets = wn.synsets(word)
if synsets:
meanings[word] = synsets[0].definition()
else:
meanings[word] = "No definition found."
return meanings
# Function to convert data to CSV format
def convert_to_csv(data, filename='dataset.csv'):
fieldnames = ['word', 'meaning']
with open(filename, mode='w', newline='', encoding='utf-8') as file:
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
for word, meaning in data.items():
writer.writerow({'word': word, 'meaning': meaning})
# Function to create and push dataset to Hugging Face
def create_and_push_dataset(csv_file='dataset.csv', repo_name='DeepFocus-X3'):
# Create a new dataset repository on Hugging Face
create_repo(repo_name, exist_ok=True)
api = HfApi()
api.upload_file(
path_or_fileobj=csv_file,
path_in_repo=csv_file,
repo_id=repo_name,
repo_type='dataset'
)
logger.info(f"Dataset {repo_name} created and file {csv_file} uploaded.")
# Gradio interface functions
def generate_words_interface():
num_words = random.randint(50, 200)
words = generate_random_words(num_words)
meanings = get_word_meanings(words)
convert_to_csv(meanings)
return f"Generated {num_words} random words and saved to dataset.csv."
def about_interface():
return "This is a dataset generation tool that creates a dataset of random words and their meanings, then uploads it to Hugging Face."
def logs_interface():
with open('dataset_generation.log', 'r') as file:
logs = file.read()
return logs
# Gradio app setup
with gr.Blocks() as demo:
with gr.Tabs():
with gr.Tab("About"):
about_text = gr.Markdown(about_interface)
with gr.Tab("Generate"):
generate_button = gr.Button("Generate Dataset")
generate_output = gr.Textbox()
generate_button.click(generate_words_interface, outputs=generate_output)
with gr.Tab("Logs"):
logs_output = gr.Textbox(value=logs_interface(), interactive=False)
# Run the Gradio app
if __name__ == "__main__":
demo.launch()