Spaces:
Sleeping
Sleeping
File size: 5,016 Bytes
73ab266 c98496e 73ab266 c98496e b17a7e8 c98496e 73ab266 b17a7e8 c98496e 73ab266 b17a7e8 c98496e b17a7e8 73ab266 b17a7e8 c98496e b17a7e8 c98496e 73ab266 c98496e 73ab266 b17a7e8 73ab266 b17a7e8 73ab266 c98496e 73ab266 b17a7e8 c98496e b17a7e8 c98496e 73ab266 b17a7e8 73ab266 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import gradio as gr
import pyarrow.parquet as pq
import pyarrow.compute as pc
from transformers import AutoTokenizer
from datasets import load_dataset
import os
import numpy as np
token_table = pq.read_table("weights/tokens.parquet")
cache_path = "weights/caches"
parquets = os.listdir(cache_path)
TOKENIZER = "microsoft/Phi-3-mini-4k-instruct"
dataset = load_dataset("kisate-team/feature-explanations", split="train")
def find_revions():
revisions = set()
for parquet in parquets:
if parquet.endswith(".parquet"):
parts = parquet.split("-")
if len(parts) > 2:
revisions.add(int(parts[2][1:]))
return sorted(revisions)
def find_layers(revision):
layers = set()
for parquet in parquets:
if parquet.endswith(".parquet"):
parts = parquet.split("-")
if len(parts) > 2 and int(parts[2][1:]) == revision:
layers.add(int(parts[1][1:]))
return sorted(layers)
revisions = find_revions()
layers = {
revision: find_layers(revision) for revision in revisions
}
features = {
revision: {
layer: {
item["feature"]:item for item in dataset if item["layer"] == layer and item["version"] == revision
} for layer in layers[revision]
} for revision in revisions
}
# layers = dataset.unique("layer")
nearby = 8
stride = 0.25
n_bins = 10
def make_cache_name(layer, revision):
return f"{cache_path}/phi-l{layer}-r{revision}-st0.25x128-activations.parquet"
with gr.Blocks() as demo:
feature_table = gr.State(None)
tokenizer_name = gr.Textbox(TOKENIZER, label="Tokenizer")
revision_dropdown = gr.Dropdown(revisions, label="Revision")
layer_dropdown = gr.Dropdown(layers[4], label="Layer")
def update_features(layer):
feature_dropdown = gr.Dropdown(features[layer].keys())
return feature_dropdown
def update_layers(revision):
layer_dropdown = gr.Dropdown(layers[revision])
return layer_dropdown
frequency = gr.Number(0, label="Total frequency (%)")
# layer_dropdown.input(update_features, layer_dropdown, feature_dropdown)
# histogram = gr.LinePlot(x="activation", y="freq")
revision_dropdown.input(update_layers, revision_dropdown, layer_dropdown)
feature_input = gr.Number(0, label="Feature")
autoi_expl = gr.Textbox(label="AutoInterp Explanation")
selfe_expl = gr.Textbox(label="SelfExplain Explanation")
cm = gr.HighlightedText()
frame = gr.Highlightedtext()
def update(revision, layer, feature, tokenizer_name):
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
table = pq.read_table(make_cache_name(layer, revision))
table_feat = table.filter(pc.field("feature") == feature).to_pandas()
# freq_t = table_feat[["activation", "freq"]]
total_freq = float(table_feat["freq"].sum()) * 100
table_feat = table_feat[table_feat["activation"] > 0]
table_feat = table_feat[table_feat["freq"] > 0]
table_feat = table_feat.sort_values("activation", ascending=False)
texts = table_feat["token"].apply(
lambda x: tokenizer.decode(token_table[max(0, x - nearby - 1):x + nearby + 1]["tokens"].to_numpy())
)
texts = [tokenizer.tokenize(text) for text in texts]
activations = table_feat["nearby"].to_numpy()
if len(activations) > 0:
activations = np.stack(activations) * stride
max_act = table_feat["activation"].max()
activations = activations / max_act
highlight_data = [
[(token, activation) for token, activation in zip(text, activation)] + [("\n", 0)]
for text, activation in zip(texts, activations)
]
flat_data = [item for sublist in highlight_data for item in sublist]
color_map_data = [i / n_bins for i in range(n_bins + 1)]
color_map_data = [(f"{i*max_act:.2f}", i) for i in color_map_data]
else:
flat_data = []
color_map_data = []
if feature in features[revision][layer]:
autoi_expl = features[revision][layer][feature]["explanation"]
selfe_expl = features[revision][layer][feature]["gen_explanations"]
if selfe_expl is not None:
selfe_expl = "\n".join(
f"{i+1}. \"{x}\"" for i, x in enumerate(selfe_expl)
)
else:
autoi_expl = "No explanation found"
selfe_expl = "No explanation found"
return flat_data, color_map_data, total_freq, autoi_expl, selfe_expl
# feature_dropdown.change(update, [layer_dropdown, feature_dropdown, tokenizer_name], [frame, cm, frequency, autoi_expl, selfe_expl])
feature_input.change(update, [revision_dropdown, layer_dropdown, feature_input, tokenizer_name], [frame, cm, frequency, autoi_expl, selfe_expl])
if __name__ == "__main__":
demo.launch(share=True)
|