Spaces:

thak123
/

text-sentence-boundary-detection

Sleeping

thak123 commited on Feb 26

Commit

017a93f

verified ·

1 Parent(s): 5560575

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ from punctuators.models import SBDModelONNX
 # This will download the ONNX and SPE models. To clean up, delete this model from your HF cache directory.
 m = SBDModelONNX.from_pretrained("sbd_multi_lang")
-def sentence_boundary_detection(input_texts):
     # Run inference
     results: List[List[str]] = m.infer([input_texts])
     print(results)
@@ -17,6 +17,27 @@ def sentence_boundary_detection(input_texts):
     return sentences, len(results[0])
 # Gradio interface
 iface = gr.Interface(
     fn=sentence_boundary_detection,

 # This will download the ONNX and SPE models. To clean up, delete this model from your HF cache directory.
 m = SBDModelONNX.from_pretrained("sbd_multi_lang")
+def sentence_boundary_detection_old(input_texts):
     # Run inference
     results: List[List[str]] = m.infer([input_texts])
     print(results)
     return sentences, len(results[0])
+import nltk
+import gradio as gr
+# Download the necessary NLTK data files
+nltk.download('punkt')
+# Load the Slovenian tokenizer
+slovenian_tokenizer = nltk.data.load('tokenizers/punkt/slovenian.pickle')
+def sentence_boundary_detection(text):
+    # Tokenize the text into sentences
+    sentences = slovenian_tokenizer.tokenize(text)
+    # Count the number of sentences
+    sentence_count = len(sentences)
+    # Join sentences with newlines for display
+    sentences_text = "\n".join(sentences)
+    return sentences_text, sentence_count
 # Gradio interface
 iface = gr.Interface(
     fn=sentence_boundary_detection,