tokenizer / app.py
wjm55
Fix tokenize_text function to correct token end index and remove unused 'supervision' package from requirements.txt
c4a8604
raw
history blame contribute delete
535 Bytes
from fastapi import FastAPI
from pydantic import BaseModel
import spacy
app = FastAPI()
nlp = spacy.blank("en")
class TokenizeRequest(BaseModel):
text: str
@app.post("/tokenize")
async def tokenize_text(request: TokenizeRequest):
doc = nlp(request.text)
tokens = []
for token in doc:
tokens.append({
"text": token.text,
"start_char": token.idx,
"end_char": token.idx + len(token.text),
"start": token.i,
"end": token.i
})
return tokens