Commit
·
b9def7b
1
Parent(s):
f7003d5
Update docs
Browse files- app.py +1 -1
- routers/soundex.py +5 -0
- routers/tokenize.py +16 -1
app.py
CHANGED
@@ -26,7 +26,7 @@ app = FastAPI(
|
|
26 |
# },
|
27 |
license_info={
|
28 |
"name": "Apache 2.0",
|
29 |
-
"
|
30 |
},
|
31 |
)
|
32 |
|
|
|
26 |
# },
|
27 |
license_info={
|
28 |
"name": "Apache 2.0",
|
29 |
+
"url": "https://www.apache.org/licenses/LICENSE-2.0.html",
|
30 |
},
|
31 |
)
|
32 |
|
routers/soundex.py
CHANGED
@@ -19,5 +19,10 @@ class SoundexEngine(str, Enum):
|
|
19 |
def soundex(text: str, engine: SoundexEngine = "udom83"):
|
20 |
"""
|
21 |
This api converts Thai text into phonetic code.
|
|
|
|
|
|
|
|
|
|
|
22 |
"""
|
23 |
return {"soundex": py_soundex(text=text, engine=engine)}
|
|
|
19 |
def soundex(text: str, engine: SoundexEngine = "udom83"):
|
20 |
"""
|
21 |
This api converts Thai text into phonetic code.
|
22 |
+
|
23 |
+
## Input
|
24 |
+
|
25 |
+
= **text**: A word that want into phonetic code.
|
26 |
+
- **engine**: Soundex Engine (default is udom83)
|
27 |
"""
|
28 |
return {"soundex": py_soundex(text=text, engine=engine)}
|
routers/tokenize.py
CHANGED
@@ -43,6 +43,11 @@ class SentTokenizeEngine(BaseModel):
|
|
43 |
def word_tokenize(text: str, engine: WordTokenizeEngine = "newmm"):
|
44 |
"""
|
45 |
Word tokenize or word segmentation for Thai language
|
|
|
|
|
|
|
|
|
|
|
46 |
"""
|
47 |
return {"words": py_word_tokenize(text=text, engine=engine)}
|
48 |
|
@@ -50,7 +55,12 @@ def word_tokenize(text: str, engine: WordTokenizeEngine = "newmm"):
|
|
50 |
@router.post('/subword_tokenize', response_model=SubwordTokenizeResponse)
|
51 |
def subword_tokenize(text: str, engine: SubwordTokenizeEngine = "tcc"):
|
52 |
"""
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
54 |
"""
|
55 |
return {"subwords": py_subword_tokenize(text=text, engine=engine)}
|
56 |
|
@@ -59,5 +69,10 @@ def subword_tokenize(text: str, engine: SubwordTokenizeEngine = "tcc"):
|
|
59 |
def sent_tokenize(text: str, engine: SentTokenizeEngine = "crfcut"):
|
60 |
"""
|
61 |
Thai sentence segmentation
|
|
|
|
|
|
|
|
|
|
|
62 |
"""
|
63 |
return {"sents": py_sent_tokenize(text=text, engine=engine)}
|
|
|
43 |
def word_tokenize(text: str, engine: WordTokenizeEngine = "newmm"):
|
44 |
"""
|
45 |
Word tokenize or word segmentation for Thai language
|
46 |
+
|
47 |
+
## Input
|
48 |
+
|
49 |
+
= **text**: Text that want to tokenize.
|
50 |
+
- **engine**: Word Tokenize Engine (default is newmm)
|
51 |
"""
|
52 |
return {"words": py_word_tokenize(text=text, engine=engine)}
|
53 |
|
|
|
55 |
@router.post('/subword_tokenize', response_model=SubwordTokenizeResponse)
|
56 |
def subword_tokenize(text: str, engine: SubwordTokenizeEngine = "tcc"):
|
57 |
"""
|
58 |
+
Subword tokenize or subword segmentation for Thai language
|
59 |
+
|
60 |
+
## Input
|
61 |
+
|
62 |
+
= **text**: Text that want to tokenize.
|
63 |
+
- **engine**: Sub word Tokenize Engine (default is tcc)
|
64 |
"""
|
65 |
return {"subwords": py_subword_tokenize(text=text, engine=engine)}
|
66 |
|
|
|
69 |
def sent_tokenize(text: str, engine: SentTokenizeEngine = "crfcut"):
|
70 |
"""
|
71 |
Thai sentence segmentation
|
72 |
+
|
73 |
+
## Input
|
74 |
+
|
75 |
+
= **text**: Text that want to tokenize.
|
76 |
+
- **engine**: Sentence Tokenize Engine (default is crfcut)
|
77 |
"""
|
78 |
return {"sents": py_sent_tokenize(text=text, engine=engine)}
|