BertExample / app.py
eaglelandsonce's picture
Create app.py
86ad0fb verified
import gradio as gr
import torch
from transformers import BertTokenizer, BertModel
# Load BERT (uncased) tokenizer & model
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertModel.from_pretrained("bert-base-uncased")
def preprocess(text: str):
# remove non‑ASCII characters and lowercase
cleaned = text.encode("ascii", "ignore").decode().lower()
# tokenize
inputs = tokenizer(cleaned, return_tensors="pt")
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
# get embeddings
with torch.no_grad():
outputs = model(**inputs)
# last_hidden_state: [1, seq_len, hidden_size] → drop batch dim
embeddings = outputs.last_hidden_state[0].tolist()
return tokens, embeddings
iface = gr.Interface(
fn=preprocess,
inputs=gr.Textbox(lines=3, placeholder="Type something…"),
outputs=[
gr.Dataframe(headers=["token"], label="Tokens"),
gr.Dataframe(label="Embeddings")
],
title="BERT Tokenizer + Embeddings",
description="Cleans input, lowercases it, then shows BERT tokens & their hidden‑state vectors."
)
if __name__ == "__main__":
iface.launch()