Spaces:

universeofml
/

DeepFocusTrain

Runtime error

App Files Files Community

katsukiai commited on Mar 5

Commit

0506cec

verified ·

1 Parent(s): ba51acd

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -80

app.py CHANGED Viewed

@@ -1,95 +1,97 @@
-import os
 import json
 import logging
-import nltk
-from tqdm import tqdm
 import gradio as gr
-from transformers import pipeline
-# Setup logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger()
-# Download NLTK data
-nltk.download('punkt')
-nltk.download('averaged_perceptron_tagger')
-nltk.download('wordnet')
-# Initialize DeepSeek AI pipeline for long text processing
-deepseek_pipeline = pipeline('text-generation', model='DeepSeekAI/DeepFocus-X3')
-# Function to process text and convert to JSONL
-def text_to_jsonl(text):
-    sentences = nltk.sent_tokenize(text)
-    jsonl_data = []
-    for sentence in tqdm(sentences, desc="Processing sentences"):
-        words = nltk.word_tokenize(sentence)
-        pos_tags = nltk.pos_tag(words)
-        meanings = [nltk.corpus.wordnet.synsets(word)[0].definition() if nltk.corpus.wordnet.synsets(word) else 'No definition' for word in words]
-        jsonl_data.append({
-            "tokenizer": pos_tags,
-            "words": words,
-            "meaning": meanings
-        })
-    return jsonl_data
-# Function to push data to Hugging Face dataset repository
-def push_to_huggingface(jsonl_data, repo_name='katsukiai/DeepFocus-X3'):
-    import huggingface_hub
-    from huggingface_hub import HfApi, DatasetCard
-    api = HfApi()
-    repo_id = f"{repo_name}"
-    # Create or get the dataset repository
-    api.create_repo(repo_id, repo_type="dataset", private=False, exist_ok=True)
-    # Write JSONL data to a file
-    jsonl_file_path = "data.jsonl"
-    with open(jsonl_file_path, "w") as f:
-        for item in jsonl_data:
-            f.write(json.dumps(item) + "\n")
-    # Upload the file to the repository
-    api.upload_file(
-        path_or_fileobj=jsonl_file_path,
-        path_in_repo="data.jsonl",
-        repo_id=repo_id,
-        repo_type="dataset"
-    )
-    logger.info(f"Data pushed to {repo_id}")
-# Gradio interface
-def generate_jsonl(text):
-    jsonl_data = text_to_jsonl(text)
-    push_to_huggingface(jsonl_data)
-    return "Data processed and pushed to Hugging Face"
-# Define Gradio interface
-def gradio_interface():
-    with gr.Blocks() as demo:
-        gr.Markdown("# Text to JSONL Converter and Hugging Face Pusher")
-        with gr.Tab("About"):
-            gr.Markdown("""
-                ## About
-                This tool converts text into JSONL format with detailed information about each word, including its tokenizer and meaning.
-                It then pushes the processed data to a Hugging Face dataset repository.
-            """)
-        with gr.Tab("Generate"):
-            with gr.Row():
-                input_text = gr.Textbox(label="Input Text", lines=5)
-                output_text = gr.Textbox(label="Output Status", lines=1)
-            generate_button = gr.Button("Generate and Push")
-            generate_button.click(fn=generate_jsonl, inputs=input_text, outputs=output_text)
-    demo.launch()
-# Run the Gradio interface
-if __name__ == "__main__":
-    gradio_interface()

 import json
 import logging
+import os
+import datetime
 import gradio as gr
+from huggingface_hub import HfApi, HfFolder
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Define the function to convert text to JSON
+def text_to_json(text):
+    lines = text.strip().split('\n')
+    data = [{"text": line} for line in lines]
+    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"converted/output_{timestamp}.json"
+    with open(filename, "w") as f:
+        json.dump(data, f, indent=4)
+    return filename
+# Define the function to generate and upload the JSON file
+def generate_and_upload(text):
+    try:
+        if not text:
+            raise ValueError("Text input is empty.")
+        logger.info(f"Received text input: {text}")
+        # Convert text to JSON and save to file
+        json_file = text_to_json(text)
+        logger.info(f"JSON file created: {json_file}")
+        # Authenticate with Hugging Face Hub
+        api = HfApi()
+        token = os.environ['HUGGINGFACE_API_TOKEN']
+        if token is None:
+            raise ValueError("Hugging Face API token not found. Please set HUGGINGFACE_API_TOKEN environment variable.")
+        # Upload the file to the dataset repository
+        repo_id = "katsukiai/DeepFocus-X3"
+        upload_info = api.upload_file(
+            path_or_fileobj=json_file,
+            path_in_repo=os.path.basename(json_file),
+            repo_id=repo_id,
+            repo_type="dataset",
+            token=token
+        )
+        logger.info(f"Upload info: {upload_info}")
+        message = f"Upload successful! Filename: {os.path.basename(json_file)}"
+        return message, json_file
+    except Exception as e:
+        logger.error(f"Error uploading file: {e}")
+        return f"Error: {e}", None
+# Create the Gradio interface
+with gr.Blocks() as demo:
+    with gr.Tab("About"):
+        gr.Markdown("""
+        # Text to JSON uploader
+        This app allows you to input text, convert it to JSON format, and upload it to the Hugging Face dataset repository.
+        ## Instructions
+        1. Enter your text in the "Generate" tab.
+        2. Click the "Generate and Upload" button.
+        3. Download the JSON file if desired.
+        4. Check the message for upload status.
+        ## Requirements
+        - Hugging Face API token set as environment variable `HUGGINGFACE_API_TOKEN`.
+        ## Obtaining Hugging Face API Token
+        1. Log in to your Hugging Face account.
+        2. Go to your profile settings.
+        3. Generate a new token or use an existing one.
+        4. Set the token as an environment variable named `HUGGINGFACE_API_TOKEN`.
+        ## Setting Environment Variable
+        - **Windows**: Set it in System Properties > Advanced > Environment Variables.
+        - **macOS/Linux**: Add `export HUGGINGFACE_API_TOKEN=your_token` to your shell profile (e.g., `.bashrc`, `.zshrc`).
+        """)
+    with gr.Tab("Generate"):
+        text_input = gr.Textbox(label="Enter text")
+        output_message = gr.Textbox(label="Status message")
+        json_file_downloader = gr.File(label="Download JSON", interactive=False)
+        generate_button = gr.Button("Generate and Upload")
+        generate_button.click(fn=generate_and_upload, inputs=text_input, outputs=[output_message, json_file_downloader])
+# Launch the Gradio app
+demo.launch()