Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,95 +1,97 @@
|
|
1 |
-
import os
|
2 |
import json
|
3 |
import logging
|
4 |
-
import
|
5 |
-
|
6 |
import gradio as gr
|
7 |
-
from
|
8 |
-
|
9 |
-
|
10 |
-
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
11 |
-
logger = logging.getLogger()
|
12 |
-
|
13 |
-
# Download NLTK data
|
14 |
-
nltk.download('punkt')
|
15 |
-
nltk.download('averaged_perceptron_tagger')
|
16 |
-
nltk.download('wordnet')
|
17 |
|
18 |
-
|
19 |
-
deepseek_pipeline = pipeline('text-generation', model='DeepSeekAI/DeepFocus-X3')
|
20 |
|
21 |
-
#
|
22 |
-
def
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
30 |
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
})
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
-
# Write JSONL data to a file
|
51 |
-
jsonl_file_path = "data.jsonl"
|
52 |
-
with open(jsonl_file_path, "w") as f:
|
53 |
-
for item in jsonl_data:
|
54 |
-
f.write(json.dumps(item) + "\n")
|
55 |
|
56 |
-
# Upload the file to the repository
|
57 |
-
api.upload_file(
|
58 |
-
path_or_fileobj=jsonl_file_path,
|
59 |
-
path_in_repo="data.jsonl",
|
60 |
-
repo_id=repo_id,
|
61 |
-
repo_type="dataset"
|
62 |
-
)
|
63 |
-
logger.info(f"Data pushed to {repo_id}")
|
64 |
|
65 |
-
# Gradio interface
|
66 |
-
def generate_jsonl(text):
|
67 |
-
jsonl_data = text_to_jsonl(text)
|
68 |
-
push_to_huggingface(jsonl_data)
|
69 |
-
return "Data processed and pushed to Hugging Face"
|
70 |
|
71 |
-
#
|
72 |
-
|
73 |
-
with gr.
|
74 |
-
gr.Markdown("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
## About
|
79 |
-
This tool converts text into JSONL format with detailed information about each word, including its tokenizer and meaning.
|
80 |
-
It then pushes the processed data to a Hugging Face dataset repository.
|
81 |
-
""")
|
82 |
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
-
demo.launch()
|
92 |
|
93 |
-
# Run the Gradio interface
|
94 |
-
if __name__ == "__main__":
|
95 |
-
gradio_interface()
|
|
|
|
|
1 |
import json
|
2 |
import logging
|
3 |
+
import os
|
4 |
+
import datetime
|
5 |
import gradio as gr
|
6 |
+
from huggingface_hub import HfApi, HfFolder
|
7 |
+
# Set up logging
|
8 |
+
logging.basicConfig(level=logging.INFO)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
+
logger = logging.getLogger(__name__)
|
|
|
11 |
|
12 |
+
# Define the function to convert text to JSON
|
13 |
+
def text_to_json(text):
|
14 |
+
lines = text.strip().split('\n')
|
15 |
+
data = [{"text": line} for line in lines]
|
16 |
+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
17 |
+
filename = f"converted/output_{timestamp}.json"
|
18 |
+
with open(filename, "w") as f:
|
19 |
+
json.dump(data, f, indent=4)
|
20 |
+
return filename
|
21 |
|
22 |
+
# Define the function to generate and upload the JSON file
|
23 |
+
def generate_and_upload(text):
|
24 |
+
try:
|
25 |
+
if not text:
|
26 |
+
raise ValueError("Text input is empty.")
|
27 |
|
28 |
+
logger.info(f"Received text input: {text}")
|
29 |
+
|
30 |
+
# Convert text to JSON and save to file
|
31 |
+
json_file = text_to_json(text)
|
32 |
+
logger.info(f"JSON file created: {json_file}")
|
33 |
+
|
34 |
+
# Authenticate with Hugging Face Hub
|
35 |
+
api = HfApi()
|
36 |
+
token = os.environ['HUGGINGFACE_API_TOKEN']
|
37 |
+
if token is None:
|
38 |
+
raise ValueError("Hugging Face API token not found. Please set HUGGINGFACE_API_TOKEN environment variable.")
|
39 |
+
|
40 |
+
# Upload the file to the dataset repository
|
41 |
+
repo_id = "katsukiai/DeepFocus-X3"
|
42 |
+
upload_info = api.upload_file(
|
43 |
+
path_or_fileobj=json_file,
|
44 |
+
path_in_repo=os.path.basename(json_file),
|
45 |
+
repo_id=repo_id,
|
46 |
+
repo_type="dataset",
|
47 |
+
token=token
|
48 |
+
)
|
49 |
+
logger.info(f"Upload info: {upload_info}")
|
50 |
+
message = f"Upload successful! Filename: {os.path.basename(json_file)}"
|
51 |
+
return message, json_file
|
52 |
+
except Exception as e:
|
53 |
+
logger.error(f"Error uploading file: {e}")
|
54 |
+
return f"Error: {e}", None
|
55 |
|
|
|
|
|
|
|
|
|
|
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
+
# Create the Gradio interface
|
60 |
+
with gr.Blocks() as demo:
|
61 |
+
with gr.Tab("About"):
|
62 |
+
gr.Markdown("""
|
63 |
+
# Text to JSON uploader
|
64 |
+
This app allows you to input text, convert it to JSON format, and upload it to the Hugging Face dataset repository.
|
65 |
+
|
66 |
+
## Instructions
|
67 |
+
1. Enter your text in the "Generate" tab.
|
68 |
+
2. Click the "Generate and Upload" button.
|
69 |
+
3. Download the JSON file if desired.
|
70 |
+
4. Check the message for upload status.
|
71 |
|
72 |
+
## Requirements
|
73 |
+
- Hugging Face API token set as environment variable `HUGGINGFACE_API_TOKEN`.
|
|
|
|
|
|
|
|
|
74 |
|
75 |
+
## Obtaining Hugging Face API Token
|
76 |
+
1. Log in to your Hugging Face account.
|
77 |
+
2. Go to your profile settings.
|
78 |
+
3. Generate a new token or use an existing one.
|
79 |
+
4. Set the token as an environment variable named `HUGGINGFACE_API_TOKEN`.
|
80 |
+
|
81 |
+
## Setting Environment Variable
|
82 |
+
- **Windows**: Set it in System Properties > Advanced > Environment Variables.
|
83 |
+
- **macOS/Linux**: Add `export HUGGINGFACE_API_TOKEN=your_token` to your shell profile (e.g., `.bashrc`, `.zshrc`).
|
84 |
+
""")
|
85 |
+
|
86 |
+
with gr.Tab("Generate"):
|
87 |
+
text_input = gr.Textbox(label="Enter text")
|
88 |
+
output_message = gr.Textbox(label="Status message")
|
89 |
+
json_file_downloader = gr.File(label="Download JSON", interactive=False)
|
90 |
+
generate_button = gr.Button("Generate and Upload")
|
91 |
+
generate_button.click(fn=generate_and_upload, inputs=text_input, outputs=[output_message, json_file_downloader])
|
92 |
+
|
93 |
+
# Launch the Gradio app
|
94 |
+
demo.launch()
|
95 |
+
|
96 |
|
|
|
97 |
|
|
|
|
|
|