File size: 1,863 Bytes
8dd9a23
 
 
 
 
809eaa8
8dd9a23
 
04ca745
a6de682
8dd9a23
a6de682
 
 
 
04ca745
 
 
 
 
 
 
a6de682
 
 
8dd9a23
 
 
 
 
 
a6de682
 
04ca745
a6de682
8dd9a23
 
04ca745
8dd9a23
04ca745
 
 
 
8dd9a23
 
 
 
0ca633f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import gradio as gr
from wtpsplit import SaT
import json

# Initialize the SaT model
sat = SaT("sat-12l-sm")
sat.half().to("cuda")

def segment_text(input_text, multi_doc_input):
    results = {}
    
    if input_text:
        # Process single text input
        sentences = sat.split(input_text)
        results["input_text"] = {"segments": sentences}
    
    if multi_doc_input:
        # Process multiple documents
        documents = [doc.strip() for doc in multi_doc_input.split('\n') if doc.strip()]
        for i, doc in enumerate(documents, 1):
            sentences = sat.split(doc)
            results[f"row_{i}"] = {"segments": sentences}
    
    # Create a JSON object with the results
    json_output = json.dumps(results, indent=2)
    
    return json_output

# Create the Gradio interface
iface = gr.Interface(
    fn=segment_text,
    inputs=[
        gr.Textbox(lines=5, label="Input Text (Optional)"),
        gr.Textbox(lines=10, label="Multiple Documents (Optional, one per line)")
    ],
    outputs=gr.JSON(label="Segmented Text (JSON)"),
    title="Text Segmentation with SaT",
    description="This app uses the SaT (Segment any Text) model to split input text into sentences and return the result as JSON. You can input text directly or provide multiple documents (one per line). All credits to the respective author(s). Github: https://github.com/segment-any-text/wtpsplit/tree/main",
    examples=[
        ["This is a test This is another test.", ""],
        ["Hello this is a test But this is different now Now the next one starts looool", ""],
        ["The quick brown fox jumps over the lazy dog It was the best of times, it was the worst of times", ""],
        ["", "Document 1 first sentence Document 1 second sentence\nDocument 2 only sentence\nDocument 3 first Document 3 second"]
    ]
)

# Launch the app
iface.launch()