File size: 4,343 Bytes
0506cec
1726149
df438e3
 
1726149
ba51acd
1726149
f4e74d7
df438e3
 
 
 
f4e74d7
3dafe4f
df438e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1726149
 
df438e3
f4e74d7
df438e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4e74d7
df438e3
f4e74d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1726149
df438e3
 
 
f4e74d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1726149
df438e3
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import os
import nltk
import csv
import logging
from tqdm import tqdm
import gradio as gr
from transformers import pipeline
from huggingface_hub import HfApi, upload_file, HfFolder

# Setup Logging
logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Download All NLTK Data
nltk.download('all')

# Constants
HF_REPO = "katsukiai/DeepFocus-X3"
TOKENIZER = 'bert-base-uncased'
MODELS = ["bert-base-uncased", "gpt2", "roberta-base", "distilbert-base-uncased", "albert-base-v2"]  # Add more models as needed

# Initialize Models
models = {model: pipeline('feature-extraction', model=model) for model in MODELS}

# Functions
def process_text(text):
    tokens = nltk.word_tokenize(text)
    words = list(set(tokens))
    means = {}
    for word in tqdm(words, desc="Processing Words"):
        word_means = {}
        for model_name, model in models.items():
            try:
                output = model(word)
                word_means[model_name] = output[0].mean().item()
            except Exception as e:
                logging.error(f"Error processing word {word} with model {model_name}: {e}")
                word_means[model_name] = None
        means[word] = word_means
    return {"tokenizer": tokens, "words": words, "meaning": means}

def save_to_csv(data, filename="output.csv"):
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=["word", "meanings"])
        writer.writeheader()
        for word in data['words']:
            writer.writerow({
                "word": word,
                "meanings": str(data['meaning'][word])
            })

def train_dataset():
    text = "Your long text goes here..."
    data = process_text(text)
    save_to_csv(data)
    logging.info("Dataset processed and saved to CSV.")

def generate_report():
    with open('app.log', 'r') as log_file:
        log_content = log_file.read()
    return log_content

# Gradio Interface
def generate_all(text):
    data = process_text(text)
    save_to_csv(data)
    return "Processed data saved to output.csv"

# Custom CSS for Tailwind CSS
custom_css = """
<head>
    <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/tailwind.min.css" rel="stylesheet">
</head>
"""

with gr.Blocks(css=custom_css) as iface:
    gr.Markdown("# DeepFocus-X3")
    with gr.Tab("Generate All"):
        with gr.Row():
            input_text = gr.Textbox(label="Input Text", placeholder="Enter your text here...", container=False)
            output_text = gr.Textbox(label="Output", placeholder="Output will appear here...", container=False)
            generate_button = gr.Button("Generate", container=False)
        generate_button.click(fn=generate_all, inputs=input_text, outputs=output_text)
        
    with gr.Tab("Logs"):
        with gr.Row():
            log_output = gr.Textbox(label="Logs", placeholder="Logs will appear here...", container=False)
            report_button = gr.Button("Report using Logs", container=False)
        report_button.click(fn=generate_report, outputs=log_output)

# Run and Push to HuggingFace
def run_and_push():
    train_dataset()
    try:
        api = HfApi()
        api.create_repo(repo_id=HF_REPO, private=False, exist_ok=True)
        upload_file(
            path_or_fileobj="output.csv",
            path_in_repo="output.csv",
            repo_id=HF_REPO
        )
        logging.info("Dataset pushed to HuggingFace.")
    except Exception as e:
        logging.error(f"Error uploading to HuggingFace: {e}")
        try:
            # Log the error to a separate errors repo
            errors_repo = "katsukiai/errors"
            api.create_repo(repo_id=errors_repo, private=False, exist_ok=True)
            with open('upload_error.log', 'w') as error_file:
                error_file.write(f"Error uploading to HuggingFace: {e}\n")
            upload_file(
                path_or_fileobj="upload_error.log",
                path_in_repo="upload_error.log",
                repo_id=errors_repo
            )
            logging.info("Error log pushed to HuggingFace errors repo.")
        except Exception as e2:
            logging.error(f"Failed to log error to HuggingFace errors repo: {e2}")

if __name__ == "__main__":
    iface.launch()
    run_and_push()