Spaces:
Runtime error
Runtime error
import spaces | |
import gradio as gr | |
from wtpsplit import SaT | |
import json | |
# Initialize the SaT model | |
sat = SaT("sat-12l-sm") | |
sat.half().to("cuda") | |
def segment_text(input_text, txt_file): | |
results = {} | |
if input_text: | |
# Process single text input | |
sentences = sat.split(input_text) | |
results["input_text"] = {"segments": sentences} | |
elif txt_file is not None: | |
# Process txt file | |
with open(txt_file.name, 'r', encoding='utf-8') as file: | |
for i, line in enumerate(file, 1): | |
line = line.strip() | |
if line: # Skip empty lines | |
sentences = sat.split(line) | |
results[f"document_{i}"] = {"segments": sentences} | |
# Create a JSON object with the results | |
json_output = json.dumps(results, indent=2) | |
return json_output | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=segment_text, | |
inputs=[ | |
gr.Textbox(lines=5, label="Input Text (Optional)"), | |
gr.File(label="Upload TXT file (Optional) Row-separated", file_types=[".txt"]) | |
], | |
outputs=gr.JSON(label="Segmented Text (JSON)"), | |
title="Text Segmentation with SaT", | |
description="This app uses the SaT (Segment any Text) model to split input text into sentences and return the result as JSON. You can input text directly or upload a TXT file containing multiple documents (one per line). All credits to the respective author(s). Github: https://github.com/segment-any-text/wtpsplit/tree/main", | |
examples=[ | |
["This is a test This is another test.", None], | |
["Hello this is a test But this is different now Now the next one starts looool", None], | |
["The quick brown fox jumps over the lazy dog It was the best of times, it was the worst of times", None], | |
] | |
) | |
# Launch the app | |
iface.launch() |