File size: 535 Bytes
c8497a7
032494e
c8497a7
 
 
 
 
032494e
 
 
c8497a7
032494e
 
c8497a7
 
 
 
 
 
 
c4a8604
c8497a7
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from fastapi import FastAPI
from pydantic import BaseModel
import spacy

app = FastAPI()
nlp = spacy.blank("en")

class TokenizeRequest(BaseModel):
    text: str

@app.post("/tokenize")
async def tokenize_text(request: TokenizeRequest):
    doc = nlp(request.text)
    tokens = []
    for token in doc:
        tokens.append({
            "text": token.text,
            "start_char": token.idx,
            "end_char": token.idx + len(token.text),
            "start": token.i,
            "end": token.i
        })
    return tokens