Spaces:

IAUCourseExp
/

Tajrobiat_Bot

Running

App Files Files Community

IAUCourseExp commited on 8 days ago

Commit

a42dd0b

verified ·

1 Parent(s): 32f1170

Update my_logic.py

Browse files

Files changed (1) hide show

my_logic.py +7 -22

my_logic.py CHANGED Viewed

@@ -9,27 +9,18 @@ import numpy as np
 from transformers import AutoTokenizer, AutoModel
-# Load CSV
-# Load FAISS index and metadata
 index = faiss.read_index("iau_reviews_index.faiss")
 with open("iau_metadata.json", "r", encoding="utf-8") as f:
     metadata = json.load(f)
 model = SentenceTransformer("HooshvareLab/bert-fa-zwnj-base")
-# Load reviews CSV
-# Load Persian tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("HooshvareLab/bert-fa-zwnj-base")
 model = AutoModel.from_pretrained("HooshvareLab/bert-fa-zwnj-base").eval()
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
-# Load FAISS index and metadata
 index = faiss.read_index("iau_reviews_index.faiss")
 with open("iau_metadata.json", "r", encoding="utf-8") as f:
     metadata = json.load(f)
@@ -84,28 +75,26 @@ def filter_relevant(results, query):
     query_tokens = set(query.split())
     def is_strict_match(row):
-        # Normalize and tokenize professor and course
         prof_tokens = set(str(row["professor"]).strip().split())
         course_tokens = set(str(row["course"]).strip().split())
-        # Match only if full token overlap exists (not substrings)
         match_prof = prof_tokens & query_tokens
         match_course = course_tokens & query_tokens
         return bool(match_prof or match_course)
-    # Return all matching results
     return [r for r in results if is_strict_match(r)]
-# ---- Fuzzy similarity score ----
 def similar(a, b):
     return SequenceMatcher(None, a, b).ratio()
-# ---- Enhanced keyword fallback ----
 def keyword_match_reviews(query, metadata):
     query = query.strip().replace("؟", "")
     keywords = set(query.split())
@@ -120,7 +109,7 @@ def keyword_match_reviews(query, metadata):
                 break
     return results
-# ---- Sort by relevance ----
 def relevance_score(row, query):
     score = 0
     if row["professor"] in query:
@@ -133,7 +122,7 @@ def relevance_score(row, query):
         score += 1
     return score
-# ---- Strict context builder (best prof+course only) ----
 def build_strict_context(reviews, user_question):
     prof_match_scores = defaultdict(int)
     course_match_scores = defaultdict(int)
@@ -167,7 +156,7 @@ def build_strict_context(reviews, user_question):
         result += f"{i}. {r['comment'].strip()}\n🔗 لینک: {r['link']}\n\n"
     return result
-# ---- Truncation helper ----
 def truncate_reviews_to_fit(reviews, max_chars=127000):
     total = 0
     final = []
@@ -179,7 +168,6 @@ def truncate_reviews_to_fit(reviews, max_chars=127000):
         total += size
     return final
-# ---- Main answer function ----
 def answer_question(user_question, gemini_model):
     print(f"\n🧠 Starting debug for question: {user_question}")
@@ -210,7 +198,7 @@ def answer_question(user_question, gemini_model):
     print(f"✂️ After truncation: {len(retrieved)} rows")
     context = build_strict_context(retrieved, user_question)
-    print("📝 Sample context sent to GPT:\n", context[:100000], "\n...")
     prompt = f"""شما یک دستیار هوشمند انتخاب واحد هستید که فقط و فقط بر اساس نظرات واقعی دانشجویان از کانال @IAUCourseExp پاسخ می‌دهید. کار شما کمک به دانشجویان برای انتخاب استاد و درس، بر اساس تجربیات ثبت‌شده در این کانال است.
@@ -238,8 +226,5 @@ def answer_question(user_question, gemini_model):
 📘 پاسخ نهایی:
 """
-    # NEW (Gemini)
     response = gemini_model.generate_content(prompt)
     return response.text

 from transformers import AutoTokenizer, AutoModel
 index = faiss.read_index("iau_reviews_index.faiss")
 with open("iau_metadata.json", "r", encoding="utf-8") as f:
     metadata = json.load(f)
 model = SentenceTransformer("HooshvareLab/bert-fa-zwnj-base")
 tokenizer = AutoTokenizer.from_pretrained("HooshvareLab/bert-fa-zwnj-base")
 model = AutoModel.from_pretrained("HooshvareLab/bert-fa-zwnj-base").eval()
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 index = faiss.read_index("iau_reviews_index.faiss")
 with open("iau_metadata.json", "r", encoding="utf-8") as f:
     metadata = json.load(f)
     query_tokens = set(query.split())
     def is_strict_match(row):
         prof_tokens = set(str(row["professor"]).strip().split())
         course_tokens = set(str(row["course"]).strip().split())
         match_prof = prof_tokens & query_tokens
         match_course = course_tokens & query_tokens
         return bool(match_prof or match_course)
     return [r for r in results if is_strict_match(r)]
 def similar(a, b):
     return SequenceMatcher(None, a, b).ratio()
 def keyword_match_reviews(query, metadata):
     query = query.strip().replace("؟", "")
     keywords = set(query.split())
                 break
     return results
 def relevance_score(row, query):
     score = 0
     if row["professor"] in query:
         score += 1
     return score
 def build_strict_context(reviews, user_question):
     prof_match_scores = defaultdict(int)
     course_match_scores = defaultdict(int)
         result += f"{i}. {r['comment'].strip()}\n🔗 لینک: {r['link']}\n\n"
     return result
 def truncate_reviews_to_fit(reviews, max_chars=127000):
     total = 0
     final = []
         total += size
     return final
 def answer_question(user_question, gemini_model):
     print(f"\n🧠 Starting debug for question: {user_question}")
     print(f"✂️ After truncation: {len(retrieved)} rows")
     context = build_strict_context(retrieved, user_question)
+    print("📝 Sample context sent to LLM:\n", context[:100000], "\n...")
     prompt = f"""شما یک دستیار هوشمند انتخاب واحد هستید که فقط و فقط بر اساس نظرات واقعی دانشجویان از کانال @IAUCourseExp پاسخ می‌دهید. کار شما کمک به دانشجویان برای انتخاب استاد و درس، بر اساس تجربیات ثبت‌شده در این کانال است.
 📘 پاسخ نهایی:
 """
     response = gemini_model.generate_content(prompt)
     return response.text