Spaces:
Runtime error
Runtime error
import os | |
import csv | |
import logging | |
import gradio as gr | |
import nltk | |
from datasets import Dataset, DatasetDict, DatasetInfo, Features, Value, ClassLabel | |
from huggingface_hub import HfApi, Repository, create_repo | |
from tqdm import tqdm | |
from nltk.tokenize import word_tokenize | |
from nltk.corpus import wordnet as wn | |
import random | |
import string | |
# Ensure necessary NLTK resources are downloaded | |
nltk.download('all') | |
#nltk.download('wordnet') | |
# Set up logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# Function to generate random words | |
def generate_random_words(num_words=100): | |
words = [] | |
for _ in range(num_words): | |
word_length = random.randint(3, 10) | |
word = ''.join(random.choices(string.ascii_lowercase, k=word_length)) | |
words.append(word) | |
return words | |
# Function to get meanings of words using NLTK WordNet | |
def get_word_meanings(words): | |
meanings = {} | |
for word in words: | |
synsets = wn.synsets(word) | |
if synsets: | |
meanings[word] = synsets[0].definition() | |
else: | |
meanings[word] = "No definition found." | |
return meanings | |
# Function to convert data to CSV format | |
def convert_to_csv(data, filename='dataset.csv'): | |
fieldnames = ['word', 'meaning'] | |
with open(filename, mode='w', newline='', encoding='utf-8') as file: | |
writer = csv.DictWriter(file, fieldnames=fieldnames) | |
writer.writeheader() | |
for word, meaning in data.items(): | |
writer.writerow({'word': word, 'meaning': meaning}) | |
# Function to create and push dataset to Hugging Face | |
def create_and_push_dataset(csv_file='dataset.csv', repo_name='DeepFocus-X3'): | |
# Create a new dataset repository on Hugging Face | |
create_repo(repo_name, exist_ok=True) | |
api = HfApi() | |
api.upload_file( | |
path_or_fileobj=csv_file, | |
path_in_repo=csv_file, | |
repo_id=repo_name, | |
repo_type='dataset' | |
) | |
logger.info(f"Dataset {repo_name} created and file {csv_file} uploaded.") | |
# Gradio interface functions | |
def generate_words_interface(): | |
num_words = random.randint(50, 200) | |
words = generate_random_words(num_words) | |
meanings = get_word_meanings(words) | |
convert_to_csv(meanings) | |
return f"Generated {num_words} random words and saved to dataset.csv." | |
def about_interface(): | |
return "This is a dataset generation tool that creates a dataset of random words and their meanings, then uploads it to Hugging Face." | |
def logs_interface(): | |
with open('dataset_generation.log', 'r') as file: | |
logs = file.read() | |
return logs | |
# Gradio app setup | |
with gr.Blocks() as demo: | |
with gr.Tabs(): | |
with gr.Tab("About"): | |
about_text = gr.Markdown(about_interface) | |
with gr.Tab("Generate"): | |
generate_button = gr.Button("Generate Dataset") | |
generate_output = gr.Textbox() | |
generate_button.click(generate_words_interface, outputs=generate_output) | |
with gr.Tab("Logs"): | |
logs_output = gr.Textbox(value=logs_interface(), interactive=False) | |
# Run the Gradio app | |
if __name__ == "__main__": | |
demo.launch() | |