Spaces:

shashwatashish
/

ArticleInsightDemo

Sleeping

App Files Files Community

shashwatashish commited on 26 days ago

Commit

38ba8f6

verified ·

1 Parent(s): 1f94b6a

Create app.py

Browse files

Files changed (1) hide show

app.py +155 -0

app.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import streamlit as st
+import pandas as pd
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
+MODEL_NAME = "valhalla/distilbart-mnli-12-1"
+device = 0 if torch.cuda.is_available() else -1
+@st.cache_resource
+def load_zero_shot_pipeline():
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
+    return pipeline("zero-shot-classification", model=model, tokenizer=tokenizer, device=device)
+zero_shot = load_zero_shot_pipeline()
+st.title("ArticleInsight (Demo Offline Pipeline)")
+st.markdown(\"\"\"
+**Upload a CSV** with an 'Abstract' column. We'll run a simple 8-step analysis:
+1. Empirical Study?
+2. Identify Construct
+3. Sample Details
+4. Main Research Question
+5. Key Findings
+6. Variables (IV/DV)
+7. Antecedents, Outcomes
+8. Unit of Analysis
+**Disclaimer**: This is a *very naive* demonstration using zero-shot classification and simple regex.
+It won't be super accurate, but requires no coding from you!
+\"\"\")
+uploaded_file = st.file_uploader("Upload CSV with 'Abstract' column")
+if uploaded_file:
+    df = pd.read_csv(uploaded_file)
+    if "Abstract" not in df.columns:
+        st.error("CSV must have an 'Abstract' column.")
+        st.stop()
+    st.success("File uploaded successfully!")
+    if st.button("Run Analysis"):
+        with st.spinner("Analyzing each abstract..."):
+            df["Empirical Study"] = ""
+            df["Construct"] = ""
+            df["Sample Details"] = ""
+            df["Research Question"] = ""
+            df["Key Findings"] = ""
+            df["Variables"] = ""
+            df["Antecedents"] = ""
+            df["Outcomes"] = ""
+            df["Unit of Analysis"] = ""
+            for i, row in df.iterrows():
+                abstract = str(row["Abstract"])
+                df.at[i, "Empirical Study"] = classify_empirical(abstract)
+                if df.at[i, "Empirical Study"] == "Yes":
+                    df.at[i, "Construct"] = find_constructs(abstract)
+                    df.at[i, "Sample Details"] = extract_sample_details(abstract)
+                    df.at[i, "Research Question"] = guess_research_question(abstract)
+                    df.at[i, "Key Findings"] = guess_key_findings(abstract)
+                    var, ants, outs = identify_variables(abstract)
+                    df.at[i, "Variables"] = var
+                    df.at[i, "Antecedents"] = ants
+                    df.at[i, "Outcomes"] = outs
+                    df.at[i, "Unit of Analysis"] = identify_unit_of_analysis(abstract)
+                else:
+                    for col in ["Construct", "Sample Details", "Research Question", "Key Findings", "Variables", "Antecedents", "Outcomes", "Unit of Analysis"]:
+                        df.at[i, col] = "N/A"
+        st.success("Done!")
+        st.dataframe(df.head(50))
+        csv_data = df.to_csv(index=False).encode("utf-8")
+        st.download_button("Download Analyzed CSV", data=csv_data, file_name="analysis_output.csv", mime="text/csv")
+# === Functions ===
+def classify_empirical(text):
+    candidate_labels = ["empirical study", "theoretical paper"]
+    res = zero_shot(text, candidate_labels)
+    top_label = res["labels"][0]
+    top_score = res["scores"][0]
+    if top_label == "empirical study" and top_score > 0.5:
+        return "Yes"
+    elif top_label == "theoretical paper" and top_score > 0.5:
+        return "No"
+    return "Unknown"
+def find_constructs(text):
+    tokens = text.lower().split()
+    freq = {}
+    for w in tokens:
+        if len(w) > 5 and w.isalpha():
+            freq[w] = freq.get(w, 0) + 1
+    sorted_freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
+    if not sorted_freq:
+        return "Unknown"
+    return ", ".join([x[0] for x in sorted_freq[:2]])
+def extract_sample_details(text):
+    import re
+    t = text.lower()
+    pattern = r"(n\s*=\s*\d+|sample of \d+|\d+\s+participants|\d+\s+subjects)"
+    matches = re.findall(pattern, t)
+    info = "; ".join([m[0] if isinstance(m, tuple) else m for m in matches]) if matches else ""
+    if "student" in t:
+        info += "; students"
+    if "employee" in t:
+        info += "; employees"
+    return info if info else "Unknown"
+def guess_research_question(text):
+    lower = text.lower()
+    if "effect of" in lower:
+        idx = lower.index("effect of")
+        snippet = text[idx: idx+60]
+        return f"Does {snippet}?"
+    elif "aim of this study" in lower:
+        idx = lower.index("aim of this study")
+        snippet = text[idx: idx+60]
+        return snippet
+    return "Unknown"
+def guess_key_findings(text):
+    lower = text.lower()
+    if "we find that" in lower:
+        idx = lower.index("we find that")
+        return text[idx: idx+100]
+    elif "results show" in lower:
+        idx = lower.index("results show")
+        return text[idx: idx+100]
+    return "Unknown"
+def identify_variables(text):
+    import re
+    t = text.lower()
+    pattern = r"(impact|influence|effect) of (\w+) on (\w+)"
+    match = re.search(pattern, t)
+    if match:
+        iv = match.group(2)
+        dv = match.group(3)
+        return f"IV: {iv}, DV: {dv}", iv, dv
+    return "Unknown", "Unknown", "Unknown"
+def identify_unit_of_analysis(text):
+    lower = text.lower()
+    if "team" in lower or "groups" in lower:
+        return "Team"
+    if "organization" in lower or "firm" in lower:
+        return "Organization"
+    if any(x in lower for x in ["participant", "individual", "student", "employee"]):
+        return "Individual"
+    return "Unknown"