thak123 commited on
Commit
017a93f
·
verified ·
1 Parent(s): 5560575

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -1
app.py CHANGED
@@ -9,7 +9,7 @@ from punctuators.models import SBDModelONNX
9
  # This will download the ONNX and SPE models. To clean up, delete this model from your HF cache directory.
10
  m = SBDModelONNX.from_pretrained("sbd_multi_lang")
11
 
12
- def sentence_boundary_detection(input_texts):
13
  # Run inference
14
  results: List[List[str]] = m.infer([input_texts])
15
  print(results)
@@ -17,6 +17,27 @@ def sentence_boundary_detection(input_texts):
17
  return sentences, len(results[0])
18
 
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  # Gradio interface
21
  iface = gr.Interface(
22
  fn=sentence_boundary_detection,
 
9
  # This will download the ONNX and SPE models. To clean up, delete this model from your HF cache directory.
10
  m = SBDModelONNX.from_pretrained("sbd_multi_lang")
11
 
12
+ def sentence_boundary_detection_old(input_texts):
13
  # Run inference
14
  results: List[List[str]] = m.infer([input_texts])
15
  print(results)
 
17
  return sentences, len(results[0])
18
 
19
 
20
+ import nltk
21
+ import gradio as gr
22
+
23
+ # Download the necessary NLTK data files
24
+ nltk.download('punkt')
25
+
26
+ # Load the Slovenian tokenizer
27
+ slovenian_tokenizer = nltk.data.load('tokenizers/punkt/slovenian.pickle')
28
+
29
+ def sentence_boundary_detection(text):
30
+ # Tokenize the text into sentences
31
+ sentences = slovenian_tokenizer.tokenize(text)
32
+
33
+ # Count the number of sentences
34
+ sentence_count = len(sentences)
35
+
36
+ # Join sentences with newlines for display
37
+ sentences_text = "\n".join(sentences)
38
+
39
+ return sentences_text, sentence_count
40
+
41
  # Gradio interface
42
  iface = gr.Interface(
43
  fn=sentence_boundary_detection,