rahideer commited on
Commit
0329a7f
Β·
verified Β·
1 Parent(s): ce5f7ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -18
app.py CHANGED
@@ -5,50 +5,53 @@ import faiss
5
  from sentence_transformers import SentenceTransformer
6
  from transformers import pipeline
7
 
8
- # Load model for embeddings and QA generation
9
  embedder = SentenceTransformer('all-MiniLM-L6-v2')
10
  generator = pipeline("text2text-generation", model="facebook/bart-large")
11
 
12
- # Load your climate news dataset (title + description)
13
  @st.cache_data
14
  def load_data():
15
-
 
 
16
  df["text"] = df["title"].fillna('') + ". " + df["description"].fillna('')
17
  return df
18
 
19
-
 
20
  corpus = df["text"].tolist()
21
  corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
22
 
23
- # Build FAISS index for fast similarity search
24
  index = faiss.IndexFlatL2(corpus_embeddings.shape[1])
25
  index.add(corpus_embeddings.cpu().detach().numpy())
26
 
 
27
  st.title("🧠 Climate News Fact Checker")
 
28
 
29
- user_input = st.text_input("Enter a claim or statement to verify:")
 
30
 
31
  if user_input:
32
- # Embed the user query
33
  query_embedding = embedder.encode([user_input])
34
-
35
- # Search similar news articles
36
  top_k = 3
37
  D, I = index.search(query_embedding, top_k)
38
-
39
- # Get the top matched articles
40
  results = [corpus[i] for i in I[0]]
41
-
42
- # Display retrieved articles
43
- st.subheader("πŸ” Retrieved News Snippets")
44
  for idx, res in enumerate(results):
45
  st.write(f"**Snippet {idx+1}:** {res}")
46
-
47
- # Combine into context for generation
48
  context = " ".join(results)
49
  prompt = f"Claim: {user_input}\nContext: {context}\nAnswer:"
50
-
51
- # Generate answer
52
  st.subheader("βœ… Fact Check Result")
53
  response = generator(prompt, max_length=100, do_sample=False)[0]['generated_text']
54
  st.write(response)
 
5
  from sentence_transformers import SentenceTransformer
6
  from transformers import pipeline
7
 
8
+ # Load models for embeddings and generation
9
  embedder = SentenceTransformer('all-MiniLM-L6-v2')
10
  generator = pipeline("text2text-generation", model="facebook/bart-large")
11
 
12
+ # Load and combine train + test datasets
13
  @st.cache_data
14
  def load_data():
15
+ train_df = pd.read_csv("train.csv")
16
+ test_df = pd.read_csv("test.csv")
17
+ df = pd.concat([train_df, test_df], ignore_index=True)
18
  df["text"] = df["title"].fillna('') + ". " + df["description"].fillna('')
19
  return df
20
 
21
+ # Load the data
22
+ df = load_data()
23
  corpus = df["text"].tolist()
24
  corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
25
 
26
+ # Build FAISS index for similarity search
27
  index = faiss.IndexFlatL2(corpus_embeddings.shape[1])
28
  index.add(corpus_embeddings.cpu().detach().numpy())
29
 
30
+ # App UI
31
  st.title("🧠 Climate News Fact Checker")
32
+ st.markdown("Enter a **claim** to check if it's supported or refuted by recent climate-related news.")
33
 
34
+ # User input
35
+ user_input = st.text_input("πŸ”Ž Enter a claim or statement:")
36
 
37
  if user_input:
38
+ # Embed the input claim
39
  query_embedding = embedder.encode([user_input])
40
+
41
+ # Retrieve top-k similar news snippets
42
  top_k = 3
43
  D, I = index.search(query_embedding, top_k)
 
 
44
  results = [corpus[i] for i in I[0]]
45
+
46
+ # Show retrieved snippets
47
+ st.subheader("πŸ“„ Retrieved News Snippets")
48
  for idx, res in enumerate(results):
49
  st.write(f"**Snippet {idx+1}:** {res}")
50
+
51
+ # Generate a response based on context
52
  context = " ".join(results)
53
  prompt = f"Claim: {user_input}\nContext: {context}\nAnswer:"
54
+
 
55
  st.subheader("βœ… Fact Check Result")
56
  response = generator(prompt, max_length=100, do_sample=False)[0]['generated_text']
57
  st.write(response)