Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
from transformers import BertTokenizer, BertModel | |
# Load BERT (uncased) tokenizer & model | |
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") | |
model = BertModel.from_pretrained("bert-base-uncased") | |
def preprocess(text: str): | |
# remove non‑ASCII characters and lowercase | |
cleaned = text.encode("ascii", "ignore").decode().lower() | |
# tokenize | |
inputs = tokenizer(cleaned, return_tensors="pt") | |
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0]) | |
# get embeddings | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
# last_hidden_state: [1, seq_len, hidden_size] → drop batch dim | |
embeddings = outputs.last_hidden_state[0].tolist() | |
return tokens, embeddings | |
iface = gr.Interface( | |
fn=preprocess, | |
inputs=gr.Textbox(lines=3, placeholder="Type something…"), | |
outputs=[ | |
gr.Dataframe(headers=["token"], label="Tokens"), | |
gr.Dataframe(label="Embeddings") | |
], | |
title="BERT Tokenizer + Embeddings", | |
description="Cleans input, lowercases it, then shows BERT tokens & their hidden‑state vectors." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |