Aranwer commited on
Commit
05f5674
Β·
verified Β·
1 Parent(s): 005c98d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -26
app.py CHANGED
@@ -1,8 +1,10 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModel, AutoModelForSeq2SeqLM, GPT2Model
3
  import torch
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
 
 
6
 
7
  MODEL_INFO = {
8
  "bert-base-uncased": {
@@ -28,23 +30,12 @@ MODEL_INFO = {
28
  "Layers": 12,
29
  "Attention Heads": 12,
30
  "Parameters": "124M"
31
- },
32
- "t5-small": {
33
- "Model Type": "T5",
34
- "Layers": 6,
35
- "Attention Heads": 8,
36
- "Parameters": "60M"
37
  }
38
  }
39
 
40
  def visualize_transformer(model_name, sentence):
41
  tokenizer = AutoTokenizer.from_pretrained(model_name)
42
-
43
- if "t5" in model_name:
44
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name, output_attentions=True)
45
- sentence = "translate English to English: " + sentence
46
- inputs = tokenizer(sentence, return_tensors='pt')
47
- elif "gpt2" in model_name:
48
  model = GPT2Model.from_pretrained(model_name, output_attentions=True)
49
  tokenizer.pad_token = tokenizer.eos_token
50
  inputs = tokenizer(sentence, return_tensors='pt', padding=True)
@@ -69,26 +60,37 @@ def visualize_transformer(model_name, sentence):
69
  token_output = [f"{i + 1}: \"{tok}\"" for i, tok in enumerate(tokens)]
70
  token_output_str = "[\n" + "\n".join(token_output) + "\n]"
71
 
 
 
 
 
 
 
 
 
 
72
  model_info = MODEL_INFO.get(model_name, {})
73
  details = f"""
74
  πŸ›  Model Details
75
  Model Type: {model_info.get("Model Type", "Unknown")}
76
-
77
- Number of Layers: {model_info.get("Layers", "?" )}
78
-
79
- Number of Attention Heads: {model_info.get("Attention Heads", "?" )}
80
-
81
- Total Parameters: {model_info.get("Parameters", "?" )}
82
 
83
  πŸ“Š Tokenization Visualization
84
  Enter Text:
85
  {sentence}
86
-
87
  Tokenized Output:
88
  {token_output_str}
 
 
 
 
 
 
89
  """
90
 
91
- return details, fig
92
 
93
  model_list = list(MODEL_INFO.keys())
94
 
@@ -99,11 +101,12 @@ iface = gr.Interface(
99
  gr.Textbox(label="Enter Input Sentence")
100
  ],
101
  outputs=[
102
- gr.Textbox(label="🧠 Model + Token Info", lines=20),
103
- gr.Plot(label="🧩 Attention Map")
 
104
  ],
105
- title="Transformer Attention Visualizer",
106
- description="Visualize attention heads of transformer models with detailed model and token information."
107
  )
108
 
109
- iface.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModel, GPT2Model
3
  import torch
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
6
+ from sklearn.decomposition import PCA
7
+ import numpy as np
8
 
9
  MODEL_INFO = {
10
  "bert-base-uncased": {
 
30
  "Layers": 12,
31
  "Attention Heads": 12,
32
  "Parameters": "124M"
 
 
 
 
 
 
33
  }
34
  }
35
 
36
  def visualize_transformer(model_name, sentence):
37
  tokenizer = AutoTokenizer.from_pretrained(model_name)
38
+ if "gpt2" in model_name:
 
 
 
 
 
39
  model = GPT2Model.from_pretrained(model_name, output_attentions=True)
40
  tokenizer.pad_token = tokenizer.eos_token
41
  inputs = tokenizer(sentence, return_tensors='pt', padding=True)
 
60
  token_output = [f"{i + 1}: \"{tok}\"" for i, tok in enumerate(tokens)]
61
  token_output_str = "[\n" + "\n".join(token_output) + "\n]"
62
 
63
+ last_hidden_state = outputs.last_hidden_state.detach().numpy()[0]
64
+ pca = PCA(n_components=2)
65
+ reduced = pca.fit_transform(last_hidden_state)
66
+ fig2, ax2 = plt.subplots()
67
+ ax2.scatter(reduced[:, 0], reduced[:, 1])
68
+ for i, token in enumerate(tokens):
69
+ ax2.annotate(token, (reduced[i, 0], reduced[i, 1]))
70
+ ax2.set_title("Token Embedding (PCA Projection)")
71
+
72
  model_info = MODEL_INFO.get(model_name, {})
73
  details = f"""
74
  πŸ›  Model Details
75
  Model Type: {model_info.get("Model Type", "Unknown")}
76
+ Number of Layers: {model_info.get("Layers", "?")}
77
+ Number of Attention Heads: {model_info.get("Attention Heads", "?")}
78
+ Total Parameters: {model_info.get("Parameters", "?")}
 
 
 
79
 
80
  πŸ“Š Tokenization Visualization
81
  Enter Text:
82
  {sentence}
 
83
  Tokenized Output:
84
  {token_output_str}
85
+
86
+ πŸ“ˆ Model Size Comparison
87
+ - BERT: 109M
88
+ - RoBERTa: 125M
89
+ - DistilBERT: 66M
90
+ - GPT-2: 124M
91
  """
92
 
93
+ return details, fig, fig2
94
 
95
  model_list = list(MODEL_INFO.keys())
96
 
 
101
  gr.Textbox(label="Enter Input Sentence")
102
  ],
103
  outputs=[
104
+ gr.Textbox(label="🧠 Model + Token Info", lines=25),
105
+ gr.Plot(label="🧩 Attention Map"),
106
+ gr.Plot(label="🧬 Token Embedding (PCA Projection)")
107
  ],
108
+ title="Transformer Visualization App",
109
+ description="Visualize Transformer models including token embeddings, attention maps, and model information."
110
  )
111
 
112
+ iface.launch()