wannaphong commited on
Commit
dd4d11e
·
verified ·
1 Parent(s): 41e295e

Update routers/tokenize.py

Browse files
Files changed (1) hide show
  1. routers/tokenize.py +7 -1
routers/tokenize.py CHANGED
@@ -42,16 +42,22 @@ class SentTokenizeEngine(BaseModel):
42
  @router.post('/word_tokenize', response_model=WordTokenizeResponse)
43
  def word_tokenize(text: str, engine: WordTokenizeEngine = "newmm"):
44
  """
45
- Word tokenize
46
  """
47
  return {"words": py_word_tokenize(text=text, engine=engine)}
48
 
49
 
50
  @router.post('/subword_tokenize', response_model=SubwordTokenizeResponse)
51
  def subword_tokenize(text: str, engine: SubwordTokenizeEngine = "tcc"):
 
 
 
52
  return {"subwords": py_subword_tokenize(text=text, engine=engine)}
53
 
54
 
55
  @router.post('/sent_tokenize', response_model=SentTokenizeEngine)
56
  def sent_tokenize(text: str, engine: SentTokenizeEngine = "crfcut"):
 
 
 
57
  return {"sents": py_sent_tokenize(text=text, engine=engine)}
 
42
  @router.post('/word_tokenize', response_model=WordTokenizeResponse)
43
  def word_tokenize(text: str, engine: WordTokenizeEngine = "newmm"):
44
  """
45
+ Word tokenize or word segmentation for Thai language
46
  """
47
  return {"words": py_word_tokenize(text=text, engine=engine)}
48
 
49
 
50
  @router.post('/subword_tokenize', response_model=SubwordTokenizeResponse)
51
  def subword_tokenize(text: str, engine: SubwordTokenizeEngine = "tcc"):
52
+ """
53
+ subword tokenize or subword segmentation for Thai language
54
+ """
55
  return {"subwords": py_subword_tokenize(text=text, engine=engine)}
56
 
57
 
58
  @router.post('/sent_tokenize', response_model=SentTokenizeEngine)
59
  def sent_tokenize(text: str, engine: SentTokenizeEngine = "crfcut"):
60
+ """
61
+ Thai sentence segmentation
62
+ """
63
  return {"sents": py_sent_tokenize(text=text, engine=engine)}