borutokarma123 commited on
Commit
0cdb8e0
Β·
verified Β·
1 Parent(s): 46d2acf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -29
app.py CHANGED
@@ -9,15 +9,15 @@ from sklearn.decomposition import PCA
9
  from transformers import AutoModel, AutoTokenizer, pipeline, AutoModelForCausalLM
10
 
11
  # App Title
12
- st.title("πŸš€ Large Language Model Explorer")
13
  st.markdown("""
14
- Large Language models, their architectures, tokenization, and attention mechanisms.
15
  """)
16
 
17
- #Selection
18
  model_name = st.selectbox(
19
- "Select Large Language Model:",
20
- ["gpt-j-6b", "opt-175b", "bigscience/bloom-176b"]
21
  )
22
 
23
  # Load Tokenizer & Model
@@ -35,12 +35,12 @@ st.write(f"Total Parameters: `{sum(p.numel() for p in model.parameters())/1e6:.2
35
  # Model Size Comparison
36
  st.subheader("πŸ“Š Model Size Comparison")
37
  model_sizes = {
38
- "gpt-j-6b": 6,
39
- "opt-175b": 175,
40
- "bigscience/bloom-176b": 176
41
  }
42
- df_size = pd.DataFrame(model_sizes.items(), columns=["Model", "Size (Billion Parameters)"])
43
- fig = px.bar(df_size, x="Model", y="Size (Billion Parameters)", title="Model Size Comparison")
44
  st.plotly_chart(fig)
45
 
46
  # Tokenization Section
@@ -68,24 +68,13 @@ with torch.no_grad():
68
  title="Token Embeddings (PCA Projection)")
69
  st.plotly_chart(fig)
70
 
71
- # Text Generation Demo
72
- st.subheader("✍️ Text Generation & Token Probabilities")
73
- model_gen = AutoModelForCausalLM.from_pretrained(model_name)
74
- generator = pipeline("text-generation", model=model_name, return_full_text=False)
75
-
76
- # Generate text
77
- generated_output = generator(input_text, max_length=50, return_tensors=True)
78
- st.write("Generated Output:", generated_output[0]["generated_text"])
79
-
80
- # Token Probability Visualization
81
  with torch.no_grad():
82
- inputs = tokenizer(input_text, return_tensors="pt")
83
- logits = model_gen(**inputs).logits[:, -1, :]
84
- probs = torch.nn.functional.softmax(logits, dim=-1).squeeze().detach().numpy()
85
- top_tokens = np.argsort(probs)[-10:][::-1] # Top 10 tokens
86
- token_probs = {tokenizer.decode([idx]): probs[idx] for idx in top_tokens}
87
- df_probs = pd.DataFrame(token_probs.items(), columns=["Token", "Probability"])
88
- fig_prob = px.bar(df_probs, x="Token", y="Probability", title="Top Token Predictions")
89
- st.plotly_chart(fig_prob)
90
 
91
- st.markdown("πŸ’‘ *Explore more about Large Language Models!*\n")
 
9
  from transformers import AutoModel, AutoTokenizer, pipeline, AutoModelForCausalLM
10
 
11
  # App Title
12
+ st.title("πŸš€ Vision Transformer Explorer")
13
  st.markdown("""
14
+ Explore Vision Transformers, their architectures, and tokenization mechanisms.
15
  """)
16
 
17
+ # Model Selection
18
  model_name = st.selectbox(
19
+ "Choose a Vision Transformer Model:",
20
+ ["beit-base-patch16", "swin-base-patch4-window7", "vit-base-patch16"]
21
  )
22
 
23
  # Load Tokenizer & Model
 
35
  # Model Size Comparison
36
  st.subheader("πŸ“Š Model Size Comparison")
37
  model_sizes = {
38
+ "beit-base-patch16": 86,
39
+ "swin-base-patch4-window7": 87,
40
+ "vit-base-patch16": 86
41
  }
42
+ df_size = pd.DataFrame(model_sizes.items(), columns=["Model", "Size (Million Parameters)"])
43
+ fig = px.bar(df_size, x="Model", y="Size (Million Parameters)", title="Model Size Comparison")
44
  st.plotly_chart(fig)
45
 
46
  # Tokenization Section
 
68
  title="Token Embeddings (PCA Projection)")
69
  st.plotly_chart(fig)
70
 
71
+ # Attention Visualization
72
+ st.subheader("πŸ” Attention Map")
 
 
 
 
 
 
 
 
73
  with torch.no_grad():
74
+ outputs = model(**inputs, output_attentions=True)
75
+ attention = outputs.attentions[-1].squeeze().detach().numpy()
76
+ fig, ax = plt.subplots(figsize=(10, 5))
77
+ sns.heatmap(attention[0], cmap="viridis", xticklabels=tokens, yticklabels=tokens, ax=ax)
78
+ st.pyplot(fig)
 
 
 
79
 
80
+ st.markdown("πŸ’‘ *Explore Vision Transformers!*\n")