|
import streamlit as st |
|
import torch |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
import numpy as np |
|
import pandas as pd |
|
import plotly.express as px |
|
from sklearn.decomposition import PCA |
|
from transformers import AutoModel, AutoTokenizer, pipeline, AutoModelForCausalLM |
|
|
|
|
|
st.title(" Transformer Model Explorer") |
|
st.markdown(""" |
|
Transformer models, their architectures, tokenization, and attention mechanisms will be displayed. |
|
""") |
|
|
|
|
|
model_name = st.selectbox( |
|
"Choose a Transformer :", |
|
["bigscience/bloom", "openai/whisper-base", "facebook/wav2vec2-base-960h"] |
|
) |
|
|
|
|
|
st.write(f"Loading model: `{model_name}`...") |
|
if "bloom" in model_name: |
|
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) |
|
model = AutoModel.from_pretrained(model_name) |
|
elif "whisper" in model_name: |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModel.from_pretrained(model_name) |
|
elif "wav2vec2" in model_name: |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModel.from_pretrained(model_name) |
|
|
|
|
|
st.subheader("π Model Details") |
|
st.write(f"Model Type: `{model.config.model_type}`") |
|
st.write(f"Number of Layers: `{model.config.num_hidden_layers}`") |
|
st.write(f"Number of Attention Heads: `{model.config.num_attention_heads if hasattr(model.config, 'num_attention_heads') else 'N/A'}`") |
|
st.write(f"Total Parameters: `{sum(p.numel() for p in model.parameters())/1e6:.2f}M`") |
|
|
|
|
|
st.subheader("π Model Size Comparison") |
|
model_sizes = { |
|
"bigscience/bloom": 176, |
|
"openai/whisper-base": 74, |
|
"facebook/wav2vec2-base-960h": 317 |
|
} |
|
df_size = pd.DataFrame(model_sizes.items(), columns=["Model", "Size (Million Parameters)"]) |
|
fig = px.bar(df_size, x="Model", y="Size (Million Parameters)", title="Model Size Comparison") |
|
st.plotly_chart(fig) |
|
|
|
|
|
st.subheader("π Tokenization Visualization") |
|
input_text = st.text_input("Enter Text:", "Hello, how are you?") |
|
|
|
if "whisper" in model_name: |
|
st.write("Note: Whisper is an audio model and doesn't use text tokenization") |
|
st.write("Instead, it processes raw audio waveforms") |
|
else: |
|
tokens = tokenizer.tokenize(input_text) |
|
st.write("Tokenized Output:", tokens) |
|
|
|
|
|
st.subheader("π§© Token Embeddings Visualization") |
|
with torch.no_grad(): |
|
if "whisper" in model_name: |
|
st.write("Note: Whisper uses a different embedding structure for audio features") |
|
st.write("Cannot directly visualize token embeddings as with text models") |
|
else: |
|
inputs = tokenizer(input_text, return_tensors="pt") |
|
outputs = model(**inputs) |
|
if hasattr(outputs, "last_hidden_state"): |
|
embeddings = outputs.last_hidden_state.squeeze(0).numpy() |
|
|
|
n_tokens = min(len(tokens), embeddings.shape[0]) |
|
embeddings = embeddings[:n_tokens] |
|
tokens = tokens[:n_tokens] |
|
pca = PCA(n_components=2) |
|
reduced_embeddings = pca.fit_transform(embeddings) |
|
df_embeddings = pd.DataFrame(reduced_embeddings, columns=["PCA1", "PCA2"]) |
|
df_embeddings["Token"] = tokens |
|
fig = px.scatter(df_embeddings, x="PCA1", y="PCA2", text="Token", |
|
title="Token Embeddings (PCA Projection)") |
|
st.plotly_chart(fig) |
|
|
|
|
|
if "bloom" in model_name: |
|
st.subheader("π Attention Map") |
|
with torch.no_grad(): |
|
outputs = model(**inputs, output_attentions=True) |
|
attention = outputs.attentions[-1].squeeze().detach().numpy() |
|
fig, ax = plt.subplots(figsize=(10, 5)) |
|
sns.heatmap(attention[0], cmap="viridis", xticklabels=tokens, yticklabels=tokens, ax=ax) |
|
st.pyplot(fig) |
|
|
|
|
|
if "bloom" in model_name: |
|
st.subheader("βοΈ Text Generation & Token Probabilities") |
|
generator = pipeline("text-generation", model=model_name, return_full_text=False) |
|
generated_output = generator(input_text, max_length=50, return_tensors=True) |
|
st.write("Generated Output:", generated_output[0]["generated_text"]) |
|
|
|
|
|
model_gen = AutoModelForCausalLM.from_pretrained(model_name) |
|
with torch.no_grad(): |
|
inputs = tokenizer(input_text, return_tensors="pt") |
|
logits = model_gen(**inputs).logits[:, -1, :] |
|
probs = torch.nn.functional.softmax(logits, dim=-1).squeeze().detach().numpy() |
|
top_tokens = np.argsort(probs)[-10:][::-1] |
|
token_probs = {tokenizer.decode([idx]): probs[idx] for idx in top_tokens} |
|
df_probs = pd.DataFrame(token_probs.items(), columns=["Token", "Probability"]) |
|
fig_prob = px.bar(df_probs, x="Token", y="Probability", title="Top Token Predictions") |
|
st.plotly_chart(fig_prob) |
|
|
|
st.markdown("π‘ *Explore more about Transformer models!*") |