Spaces:

Guy24
/

inner_lexicon

Running on Zero

App Files Files Community

Guy24 commited on 9 days ago

Commit

95d29eb

1 Parent(s): cbfd114

adding application

Browse files

Files changed (1) hide show

app.py +46 -53

app.py CHANGED Viewed

@@ -226,58 +226,55 @@ def find_last_token_index(full_ids, word_ids):
 def analyse_word(model_name: str, extraction_template: str, word: str, patchscopes_template: str):
-    if PatchscopesRetriever is None:
-        return (
-            "<p style='color:red'>❌ Patchscopes library not found. Run:<br/>"
-            "<code>pip install git+https://github.com/schwartz-lab-NLP/Tokens2Words</code></p>"
-        )
-    model, tokenizer = get_model_and_tokenizer(model_name)
-    # Build extraction prompt (where hidden states will be collected)
-    extraction_prompt ="X"
-    # Identify last token position of the *word* inside the prompt IDs
-    word_token_ids = tokenizer.encode(word, add_special_tokens=False)
-    # Instantiate Patchscopes retriever
-    patch_retriever = PatchscopesRetriever(
-        model,
-        tokenizer,
-        extraction_prompt,
-        patchscopes_template,
-        prompt_target_placeholder="X",
-    )
-    # Run retrieval for the word across all layers (one pass)
-    retrieved_words  = patch_retriever.get_hidden_states_and_retrieve_word(
-        word,
-        num_tokens_to_generate=len(tokenizer.tokenize(word)),
-    )[0]
-    # Build a table summarising which layers match
-    records = []
-    matches = 0
-    for layer_idx, ret_word in enumerate(retrieved_words):
-        match = ret_word.strip(" ") == word.strip(" ")
-        if match:
-            matches += 1
-        records.append({"Layer": layer_idx, "Retrieved": ret_word, "Match?": "✓" if match else ""})
-    df = pd.DataFrame(records)
-    def _style(row):
-        color = "background-color: lightgreen" if row["Match?"] else ""
-        return [color] * len(row)
-    html_table = df.style.apply(_style, axis=1).hide(axis="index").to_html(escape=False)
-    sub_tokens = tokenizer.convert_ids_to_tokens(word_token_ids)
-    top = (
-        f"<p><b>Sub‑word tokens:</b> {' , '.join(sub_tokens)}</p>"
-        f"<p><b>Total matched layers:</b> {matches} / {len(retrieved_words)}</p>"
-    )
-    return top + html_table
 # ----------------------------- GRADIO UI -------------------------------
@@ -311,8 +308,4 @@ with gr.Blocks(theme="soft") as demo:
     )
 if __name__ == "__main__":
-    try:
-        demo.launch()
-    except Exception as e:
-        print(f"Error launching Gradio app: {e}")
-        raise

 def analyse_word(model_name: str, extraction_template: str, word: str, patchscopes_template: str):
+    try:
+        model, tokenizer = get_model_and_tokenizer(model_name)
+        # Build extraction prompt (where hidden states will be collected)
+        extraction_prompt ="X"
+        # Identify last token position of the *word* inside the prompt IDs
+        word_token_ids = tokenizer.encode(word, add_special_tokens=False)
+        # Instantiate Patchscopes retriever
+        patch_retriever = PatchscopesRetriever(
+            model,
+            tokenizer,
+            extraction_prompt,
+            patchscopes_template,
+            prompt_target_placeholder="X",
+        )
+        # Run retrieval for the word across all layers (one pass)
+        retrieved_words  = patch_retriever.get_hidden_states_and_retrieve_word(
+            word,
+            num_tokens_to_generate=len(tokenizer.tokenize(word)),
+        )[0]
+        # Build a table summarising which layers match
+        records = []
+        matches = 0
+        for layer_idx, ret_word in enumerate(retrieved_words):
+            match = ret_word.strip(" ") == word.strip(" ")
+            if match:
+                matches += 1
+            records.append({"Layer": layer_idx, "Retrieved": ret_word, "Match?": "✓" if match else ""})
+        df = pd.DataFrame(records)
+        def _style(row):
+            color = "background-color: lightgreen" if row["Match?"] else ""
+            return [color] * len(row)
+        html_table = df.style.apply(_style, axis=1).hide(axis="index").to_html(escape=False)
+        sub_tokens = tokenizer.convert_ids_to_tokens(word_token_ids)
+        top = (
+            f"<p><b>Sub‑word tokens:</b> {' , '.join(sub_tokens)}</p>"
+            f"<p><b>Total matched layers:</b> {matches} / {len(retrieved_words)}</p>"
+        )
+        return top + html_table
+    except Exception as e:
+        return f"<p style='color:red'>❌ Error: {str(e)}</p>"
 # ----------------------------- GRADIO UI -------------------------------
     )
 if __name__ == "__main__":
+    demo.launch()