Commit
·
9dbb134
1
Parent(s):
1bc58c6
Update code
Browse files- routers/soundex.py +6 -2
- routers/spell.py +11 -3
- routers/tokenize.py +14 -4
- routers/util.py +16 -4
routers/soundex.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
-
|
|
|
3 |
from pythainlp.soundex import (
|
4 |
soundex as py_soundex
|
5 |
)
|
@@ -25,4 +26,7 @@ def soundex(word: str, engine: SoundexEngine = "udom83"):
|
|
25 |
- **word**: A word that want into phonetic code.
|
26 |
- **engine**: Soundex Engine (default is udom83)
|
27 |
"""
|
28 |
-
return
|
|
|
|
|
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
import json
|
3 |
+
from fastapi import APIRouter, Response
|
4 |
from pythainlp.soundex import (
|
5 |
soundex as py_soundex
|
6 |
)
|
|
|
26 |
- **word**: A word that want into phonetic code.
|
27 |
- **engine**: Soundex Engine (default is udom83)
|
28 |
"""
|
29 |
+
return Response(
|
30 |
+
json.dumps({"soundex": py_soundex(text=word, engine=engine)}, ensure_ascii=False),
|
31 |
+
media_type="application/json",
|
32 |
+
)
|
routers/spell.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
-
|
|
|
3 |
from pythainlp.spell import (
|
4 |
correct as py_correct,
|
5 |
spell as py_spell
|
@@ -33,7 +34,11 @@ def correct(word: float, engine: CorrectEngine = "pn"):
|
|
33 |
- **word**: A word that want corrects the spelling of the given word.
|
34 |
- **engine**: Correct Engine (default is pn)
|
35 |
"""
|
36 |
-
return
|
|
|
|
|
|
|
|
|
37 |
|
38 |
@router.post('/spell', response_model=SpellResponse)
|
39 |
def spell(word: float, engine: SpellEngine = "pn"):
|
@@ -45,4 +50,7 @@ def spell(word: float, engine: SpellEngine = "pn"):
|
|
45 |
- **word**: A word that want to check spell.
|
46 |
- **engine**: Spell Engine (default is pn)
|
47 |
"""
|
48 |
-
return
|
|
|
|
|
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
import json
|
3 |
+
from fastapi import APIRouter, Response
|
4 |
from pythainlp.spell import (
|
5 |
correct as py_correct,
|
6 |
spell as py_spell
|
|
|
34 |
- **word**: A word that want corrects the spelling of the given word.
|
35 |
- **engine**: Correct Engine (default is pn)
|
36 |
"""
|
37 |
+
return Response(
|
38 |
+
json.dumps({"word": py_correct(word, engine=engine)}, ensure_ascii=False),
|
39 |
+
media_type="application/json",
|
40 |
+
)
|
41 |
+
|
42 |
|
43 |
@router.post('/spell', response_model=SpellResponse)
|
44 |
def spell(word: float, engine: SpellEngine = "pn"):
|
|
|
50 |
- **word**: A word that want to check spell.
|
51 |
- **engine**: Spell Engine (default is pn)
|
52 |
"""
|
53 |
+
return Response(
|
54 |
+
json.dumps({"word": py_spell(word, engine=engine)}, ensure_ascii=False),
|
55 |
+
media_type="application/json",
|
56 |
+
)
|
routers/tokenize.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
-
|
|
|
3 |
from pythainlp.tokenize import (
|
4 |
word_tokenize as py_word_tokenize,
|
5 |
subword_tokenize as py_subword_tokenize,
|
@@ -49,7 +50,10 @@ def word_tokenize(text: str, engine: WordTokenizeEngine = "newmm"):
|
|
49 |
- **text**: Text that want to tokenize.
|
50 |
- **engine**: Word Tokenize Engine (default is newmm)
|
51 |
"""
|
52 |
-
return
|
|
|
|
|
|
|
53 |
|
54 |
|
55 |
@router.post('/subword_tokenize', response_model=SubwordTokenizeResponse)
|
@@ -62,7 +66,10 @@ def subword_tokenize(text: str, engine: SubwordTokenizeEngine = "tcc"):
|
|
62 |
- **text**: Text that want to tokenize.
|
63 |
- **engine**: Sub word Tokenize Engine (default is tcc)
|
64 |
"""
|
65 |
-
return
|
|
|
|
|
|
|
66 |
|
67 |
|
68 |
@router.post('/sent_tokenize', response_model=SentTokenizeEngine)
|
@@ -75,4 +82,7 @@ def sent_tokenize(text: str, engine: SentTokenizeEngine = "crfcut"):
|
|
75 |
- **text**: Text that want to tokenize.
|
76 |
- **engine**: Sentence Tokenize Engine (default is crfcut)
|
77 |
"""
|
78 |
-
return
|
|
|
|
|
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
import json
|
3 |
+
from fastapi import APIRouter, Response
|
4 |
from pythainlp.tokenize import (
|
5 |
word_tokenize as py_word_tokenize,
|
6 |
subword_tokenize as py_subword_tokenize,
|
|
|
50 |
- **text**: Text that want to tokenize.
|
51 |
- **engine**: Word Tokenize Engine (default is newmm)
|
52 |
"""
|
53 |
+
return Response(
|
54 |
+
json.dumps({"words": py_word_tokenize(text=text, engine=engine)}, ensure_ascii=False),
|
55 |
+
media_type="application/json",
|
56 |
+
)
|
57 |
|
58 |
|
59 |
@router.post('/subword_tokenize', response_model=SubwordTokenizeResponse)
|
|
|
66 |
- **text**: Text that want to tokenize.
|
67 |
- **engine**: Sub word Tokenize Engine (default is tcc)
|
68 |
"""
|
69 |
+
return Response(
|
70 |
+
json.dumps({"subwords": py_subword_tokenize(text=text, engine=engine)}, ensure_ascii=False),
|
71 |
+
media_type="application/json",
|
72 |
+
)
|
73 |
|
74 |
|
75 |
@router.post('/sent_tokenize', response_model=SentTokenizeEngine)
|
|
|
82 |
- **text**: Text that want to tokenize.
|
83 |
- **engine**: Sentence Tokenize Engine (default is crfcut)
|
84 |
"""
|
85 |
+
return Response(
|
86 |
+
json.dumps({"sents": py_sent_tokenize(text=text, engine=engine)}, ensure_ascii=False),
|
87 |
+
media_type="application/json",
|
88 |
+
)
|
routers/util.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
-
|
|
|
3 |
from pythainlp.util import (
|
4 |
bahttext as py_bahttext,
|
5 |
normalize as py_normalize,
|
@@ -12,18 +13,29 @@ def bahttext(number: float):
|
|
12 |
"""
|
13 |
This api converts a number to Thai text and adds a suffix “บาท” (Baht).
|
14 |
"""
|
15 |
-
return
|
|
|
|
|
|
|
|
|
16 |
|
17 |
@router.post('/normalize')
|
18 |
def normalize(text: str):
|
19 |
"""
|
20 |
Normalize and clean Thai text
|
21 |
"""
|
22 |
-
return
|
|
|
|
|
|
|
|
|
23 |
|
24 |
@router.post('/tone_detector')
|
25 |
def tone_detector(syllable: str):
|
26 |
"""
|
27 |
Thai tone detector for word.
|
28 |
"""
|
29 |
-
return
|
|
|
|
|
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
import json
|
3 |
+
from fastapi import APIRouter, Response
|
4 |
from pythainlp.util import (
|
5 |
bahttext as py_bahttext,
|
6 |
normalize as py_normalize,
|
|
|
13 |
"""
|
14 |
This api converts a number to Thai text and adds a suffix “บาท” (Baht).
|
15 |
"""
|
16 |
+
return Response(
|
17 |
+
json.dumps({"bahttext": py_bahttext(number)}, ensure_ascii=False),
|
18 |
+
media_type="application/json",
|
19 |
+
)
|
20 |
+
|
21 |
|
22 |
@router.post('/normalize')
|
23 |
def normalize(text: str):
|
24 |
"""
|
25 |
Normalize and clean Thai text
|
26 |
"""
|
27 |
+
return Response(
|
28 |
+
json.dumps({"text": py_normalize(text)}, ensure_ascii=False),
|
29 |
+
media_type="application/json",
|
30 |
+
)
|
31 |
+
|
32 |
|
33 |
@router.post('/tone_detector')
|
34 |
def tone_detector(syllable: str):
|
35 |
"""
|
36 |
Thai tone detector for word.
|
37 |
"""
|
38 |
+
return Response(
|
39 |
+
json.dumps({"tone": py_tone_detector(syllable)}, ensure_ascii=False),
|
40 |
+
media_type="application/json",
|
41 |
+
)
|