katsukiai commited on
Commit
6252133
·
verified ·
1 Parent(s): 5545c32

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -156
app.py CHANGED
@@ -1,173 +1,94 @@
1
  import os
2
- import nltk
3
  import csv
4
  import logging
5
- from tqdm import tqdm
6
  import gradio as gr
7
- from transformers import pipeline
8
- from huggingface_hub import HfApi, upload_file, HfFolder
9
- import time
10
-
11
- # Setup Logging
12
- logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
 
13
 
14
- # Download All NLTK Data
15
- nltk.download('all')
 
16
 
17
- # Constants
18
- HF_REPO = "katsukiai/DeepFocus-X3"
19
- TOKENIZER = 'bert-base-uncased'
20
- MODELS = ["bert-base-uncased", "gpt2", "roberta-base", "distilbert-base-uncased", "albert-base-v2"] # Corrected model identifier
21
 
22
- # Initialize Models
23
- try:
24
- models = {model: pipeline('feature-extraction', model=model) for model in MODELS}
25
- except Exception as e:
26
- logging.error(f"Error initializing models: {e}")
27
- models = {}
 
 
28
 
29
- # Functions
30
- def process_text(text):
31
- tokens = nltk.word_tokenize(text)
32
- words = list(set(tokens))
33
- means = {}
34
- for word in tqdm(words, desc="Processing Words"):
35
- word_means = {}
36
- for model_name, model in models.items():
37
- try:
38
- output = model(word)
39
- word_means[model_name] = output[0].mean().item()
40
- except Exception as e:
41
- logging.error(f"Error processing word {word} with model {model_name}: {e}")
42
- word_means[model_name] = None
43
- means[word] = word_means
44
- return {"tokenizer": tokens, "words": words, "meaning": means}
45
 
46
- def save_to_csv(data, filename="output.csv"):
47
- with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
48
- writer = csv.DictWriter(csvfile, fieldnames=["word", "meanings"])
 
 
49
  writer.writeheader()
50
- for word in data['words']:
51
- writer.writerow({
52
- "word": word,
53
- "meanings": str(data['meaning'][word])
54
- })
55
-
56
- def train_dataset():
57
- text = "Your long text goes here..."
58
- data = process_text(text)
59
- save_to_csv(data)
60
- logging.info("Dataset processed and saved to CSV.")
61
-
62
- def generate_report():
63
- with open('app.log', 'r') as log_file:
64
- log_content = log_file.read()
65
- return log_content
66
-
67
- def get_uptime():
68
- uptime = time.strftime('%H:%M:%S', time.gmtime(time.time() - start_time))
69
- return f"Uptime: {uptime}"
70
 
71
- # Gradio Interface
72
- def generate_all(text):
73
- data = process_text(text)
74
- save_to_csv(data)
75
- return "Processed data saved to output.csv"
 
 
 
 
 
 
 
76
 
77
- # Custom CSS for Tailwind CSS
78
- custom_css = """
79
- <style>
80
- @import url('https://cdn.jsdelivr.net/npm/[email protected]/dist/tailwind.min.css');
81
- body {
82
- font-family: 'Arial', sans-serif;
83
- }
84
- #title {
85
- text-align: center;
86
- margin-bottom: 20px;
87
- }
88
- #input_text, #output_text, #log_output, #commit_input, #username_input, #metadata_input, #uptime_text {
89
- width: 100%;
90
- max-width: 600px;
91
- margin: 10px 0;
92
- }
93
- #generate_button, #report_button, #save_settings_button {
94
- width: 100%;
95
- max-width: 200px;
96
- margin: 10px 0;
97
- }
98
- #settings_container {
99
- margin-top: 20px;
100
- }
101
- </style>
102
- """
103
 
104
- start_time = time.time()
 
105
 
106
- with gr.Blocks(css=custom_css) as iface:
107
- gr.Markdown("# DeepFocus-X3", elem_id="title")
108
-
109
- with gr.Tab("Generate All"):
110
- with gr.Row():
111
- input_text = gr.Textbox(label="Input Text", placeholder="Enter your text here...", elem_id="input_text")
112
- output_text = gr.Textbox(label="Output", placeholder="Output will appear here...", elem_id="output_text")
113
- generate_button = gr.Button("Generate", elem_id="generate_button")
114
- generate_button.click(fn=generate_all, inputs=input_text, outputs=output_text)
115
-
116
- with gr.Tab("Logs"):
117
- with gr.Row():
118
- log_output = gr.Textbox(label="Logs", placeholder="Logs will appear here...", elem_id="log_output")
119
- report_button = gr.Button("Report using Logs", elem_id="report_button")
120
- report_button.click(fn=generate_report, outputs=log_output)
121
-
122
- with gr.Tab("Settings"):
123
- with gr.Row():
124
- commit_input = gr.Textbox(label="Commit", placeholder="Enter commit message", elem_id="commit_input")
125
- username_input = gr.Textbox(label="Username", placeholder="Enter your username", elem_id="username_input")
126
- metadata_input = gr.Textbox(label="Metadata", placeholder="Enter metadata", elem_id="metadata_input")
127
- uptime_text = gr.Textbox(label="Uptime", placeholder="Uptime will appear here...", elem_id="uptime_text", interactive=False)
128
- save_settings_button = gr.Button("Save Settings", elem_id="save_settings_button")
129
-
130
- save_settings_button.click(
131
- fn=lambda commit, username, metadata: f"Settings saved: {commit}, {username}, {metadata}",
132
- inputs=[commit_input, username_input, metadata_input],
133
- outputs=[uptime_text] # Reusing uptime_text for output to show settings saved message
134
- )
135
-
136
- # Update uptime every 10 seconds
137
- def update_uptime():
138
- return get_uptime()
139
-
140
- gr.Every(10, fn=update_uptime, outputs=uptime_text)
141
 
142
- # Run and Push to HuggingFace
143
- def run_and_push():
144
- train_dataset()
145
- try:
146
- api = HfApi()
147
- api.create_repo(repo_id=HF_REPO, private=False, exist_ok=True)
148
- upload_file(
149
- path_or_fileobj="output.csv",
150
- path_in_repo="output.csv",
151
- repo_id=HF_REPO
152
- )
153
- logging.info("Dataset pushed to HuggingFace.")
154
- except Exception as e:
155
- logging.error(f"Error uploading to HuggingFace: {e}")
156
- try:
157
- # Log the error to a separate errors repo
158
- errors_repo = "katsukiai/errors"
159
- api.create_repo(repo_id=errors_repo, private=False, exist_ok=True)
160
- with open('upload_error.log', 'w') as error_file:
161
- error_file.write(f"Error uploading to HuggingFace: {e}\n")
162
- upload_file(
163
- path_or_fileobj="upload_error.log",
164
- path_in_repo="upload_error.log",
165
- repo_id=errors_repo
166
- )
167
- logging.info("Error log pushed to HuggingFace errors repo.")
168
- except Exception as e2:
169
- logging.error(f"Failed to log error to HuggingFace errors repo: {e2}")
170
 
 
171
  if __name__ == "__main__":
172
- iface.launch()
173
- run_and_push()
 
1
  import os
 
2
  import csv
3
  import logging
 
4
  import gradio as gr
5
+ import nltk
6
+ from datasets import Dataset, DatasetDict, DatasetInfo, Features, Value, ClassLabel
7
+ from huggingface_hub import HfApi, Repository, create_repo
8
+ from tqdm import tqdm
9
+ from nltk.tokenize import word_tokenize
10
+ from nltk.corpus import wordnet as wn
11
+ import random
12
+ import string
13
 
14
+ # Ensure necessary NLTK resources are downloaded
15
+ nltk.download('punkt')
16
+ nltk.download('wordnet')
17
 
18
+ # Set up logging
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
 
21
 
22
+ # Function to generate random words
23
+ def generate_random_words(num_words=100):
24
+ words = []
25
+ for _ in range(num_words):
26
+ word_length = random.randint(3, 10)
27
+ word = ''.join(random.choices(string.ascii_lowercase, k=word_length))
28
+ words.append(word)
29
+ return words
30
 
31
+ # Function to get meanings of words using NLTK WordNet
32
+ def get_word_meanings(words):
33
+ meanings = {}
34
+ for word in words:
35
+ synsets = wn.synsets(word)
36
+ if synsets:
37
+ meanings[word] = synsets[0].definition()
38
+ else:
39
+ meanings[word] = "No definition found."
40
+ return meanings
 
 
 
 
 
 
41
 
42
+ # Function to convert data to CSV format
43
+ def convert_to_csv(data, filename='dataset.csv'):
44
+ fieldnames = ['word', 'meaning']
45
+ with open(filename, mode='w', newline='', encoding='utf-8') as file:
46
+ writer = csv.DictWriter(file, fieldnames=fieldnames)
47
  writer.writeheader()
48
+ for word, meaning in data.items():
49
+ writer.writerow({'word': word, 'meaning': meaning})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ # Function to create and push dataset to Hugging Face
52
+ def create_and_push_dataset(csv_file='dataset.csv', repo_name='DeepFocus-X3'):
53
+ # Create a new dataset repository on Hugging Face
54
+ create_repo(repo_name, exist_ok=True)
55
+ api = HfApi()
56
+ api.upload_file(
57
+ path_or_fileobj=csv_file,
58
+ path_in_repo=csv_file,
59
+ repo_id=repo_name,
60
+ repo_type='dataset'
61
+ )
62
+ logger.info(f"Dataset {repo_name} created and file {csv_file} uploaded.")
63
 
64
+ # Gradio interface functions
65
+ def generate_words_interface():
66
+ num_words = random.randint(50, 200)
67
+ words = generate_random_words(num_words)
68
+ meanings = get_word_meanings(words)
69
+ convert_to_csv(meanings)
70
+ return f"Generated {num_words} random words and saved to dataset.csv."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ def about_interface():
73
+ return "This is a dataset generation tool that creates a dataset of random words and their meanings, then uploads it to Hugging Face."
74
 
75
+ def logs_interface():
76
+ with open('dataset_generation.log', 'r') as file:
77
+ logs = file.read()
78
+ return logs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ # Gradio app setup
81
+ with gr.Blocks() as demo:
82
+ with gr.Tabs():
83
+ with gr.Tab("About"):
84
+ about_text = gr.Markdown(about_interface)
85
+ with gr.Tab("Generate"):
86
+ generate_button = gr.Button("Generate Dataset")
87
+ generate_output = gr.Textbox()
88
+ generate_button.click(generate_words_interface, outputs=generate_output)
89
+ with gr.Tab("Logs"):
90
+ logs_output = gr.Textbox(value=logs_interface(), interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
+ # Run the Gradio app
93
  if __name__ == "__main__":
94
+ demo.launch()