|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
import numpy as np |
|
import torch |
|
from sklearn.decomposition import PCA |
|
|
|
|
|
def plot_attention(tokens, attn_matrix): |
|
fig, ax = plt.subplots(figsize=(8, 6)) |
|
cax = ax.matshow(attn_matrix, cmap="viridis") |
|
fig.colorbar(cax) |
|
ax.set_xticks(range(len(tokens))) |
|
ax.set_yticks(range(len(tokens))) |
|
ax.set_xticklabels(tokens, rotation=90) |
|
ax.set_yticklabels(tokens) |
|
ax.set_title("Attention Map") |
|
plt.tight_layout() |
|
return fig |
|
|
|
|
|
def visualize_attention(tokenizer, model, text, layer_index, head_index): |
|
inputs = tokenizer(text, return_tensors="pt") |
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
|
|
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0]) |
|
attn = outputs.attentions[layer_index][0, head_index].detach().numpy() |
|
return plot_attention(tokens, attn) |
|
|
|
|
|
def show_tokenization(tokenizer, text): |
|
tokens = tokenizer.tokenize(text) |
|
fig, ax = plt.subplots(figsize=(8, 1)) |
|
ax.imshow([[0] * len(tokens)], cmap="Pastel2", aspect="auto") |
|
ax.set_xticks(range(len(tokens))) |
|
ax.set_xticklabels(tokens, rotation=90) |
|
ax.set_yticks([]) |
|
ax.set_title("Tokenization") |
|
return fig |
|
|
|
|
|
def show_embeddings(tokenizer, model, text): |
|
inputs = tokenizer(text, return_tensors="pt") |
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
|
|
embeddings = outputs.last_hidden_state[0].detach().numpy() |
|
pca = PCA(n_components=2) |
|
reduced = pca.fit_transform(embeddings) |
|
|
|
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0]) |
|
fig, ax = plt.subplots() |
|
ax.scatter(reduced[:, 0], reduced[:, 1]) |
|
|
|
for i, token in enumerate(tokens): |
|
ax.annotate(token, (reduced[i, 0], reduced[i, 1])) |
|
|
|
ax.set_title("Token Embeddings (PCA)") |
|
return fig |
|
|
|
|
|
def compare_model_sizes(): |
|
from model_utils import MODEL_OPTIONS |
|
from transformers import AutoModel |
|
|
|
model_names = list(MODEL_OPTIONS.values()) |
|
sizes = [] |
|
|
|
for name in model_names: |
|
try: |
|
model = AutoModel.from_pretrained(name) |
|
size = sum(p.numel() for p in model.parameters()) / 1e6 |
|
sizes.append(size) |
|
except: |
|
sizes.append(None) |
|
|
|
fig, ax = plt.subplots() |
|
ax.bar(list(MODEL_OPTIONS.keys()), sizes, color="skyblue") |
|
ax.set_ylabel("Parameters (Millions)") |
|
ax.set_title("Model Size Comparison") |
|
ax.tick_params(axis='x', rotation=45) |
|
plt.tight_layout() |
|
return fig |
|
|