tokenizer / app.py
wjm55
Implement initial project structure and setup
c8497a7
raw
history blame
436 Bytes
from fastapi import FastAPI
import spacy
app = FastAPI()
nlp = spacy.blank("en")
@app.post("/tokenize")
async def tokenize_text(text: str):
doc = nlp(text)
tokens = []
for token in doc:
tokens.append({
"text": token.text,
"start_char": token.idx,
"end_char": token.idx + len(token.text),
"start": token.i,
"end": token.i + 1
})
return tokens