IAUCourseExp commited on
Commit
a42dd0b
·
verified ·
1 Parent(s): 32f1170

Update my_logic.py

Browse files
Files changed (1) hide show
  1. my_logic.py +7 -22
my_logic.py CHANGED
@@ -9,27 +9,18 @@ import numpy as np
9
  from transformers import AutoTokenizer, AutoModel
10
 
11
 
12
- # Load CSV
13
-
14
- # Load FAISS index and metadata
15
  index = faiss.read_index("iau_reviews_index.faiss")
16
  with open("iau_metadata.json", "r", encoding="utf-8") as f:
17
  metadata = json.load(f)
18
 
19
-
20
-
21
  model = SentenceTransformer("HooshvareLab/bert-fa-zwnj-base")
22
- # Load reviews CSV
23
 
24
-
25
- # Load Persian tokenizer and model
26
  tokenizer = AutoTokenizer.from_pretrained("HooshvareLab/bert-fa-zwnj-base")
27
  model = AutoModel.from_pretrained("HooshvareLab/bert-fa-zwnj-base").eval()
28
 
29
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
30
  model.to(device)
31
 
32
- # Load FAISS index and metadata
33
  index = faiss.read_index("iau_reviews_index.faiss")
34
  with open("iau_metadata.json", "r", encoding="utf-8") as f:
35
  metadata = json.load(f)
@@ -84,28 +75,26 @@ def filter_relevant(results, query):
84
  query_tokens = set(query.split())
85
 
86
  def is_strict_match(row):
87
- # Normalize and tokenize professor and course
88
  prof_tokens = set(str(row["professor"]).strip().split())
89
  course_tokens = set(str(row["course"]).strip().split())
90
 
91
- # Match only if full token overlap exists (not substrings)
92
  match_prof = prof_tokens & query_tokens
93
  match_course = course_tokens & query_tokens
94
 
95
  return bool(match_prof or match_course)
96
 
97
- # Return all matching results
98
  return [r for r in results if is_strict_match(r)]
99
 
100
 
101
 
102
 
103
 
104
- # ---- Fuzzy similarity score ----
105
  def similar(a, b):
106
  return SequenceMatcher(None, a, b).ratio()
107
 
108
- # ---- Enhanced keyword fallback ----
109
  def keyword_match_reviews(query, metadata):
110
  query = query.strip().replace("؟", "")
111
  keywords = set(query.split())
@@ -120,7 +109,7 @@ def keyword_match_reviews(query, metadata):
120
  break
121
  return results
122
 
123
- # ---- Sort by relevance ----
124
  def relevance_score(row, query):
125
  score = 0
126
  if row["professor"] in query:
@@ -133,7 +122,7 @@ def relevance_score(row, query):
133
  score += 1
134
  return score
135
 
136
- # ---- Strict context builder (best prof+course only) ----
137
  def build_strict_context(reviews, user_question):
138
  prof_match_scores = defaultdict(int)
139
  course_match_scores = defaultdict(int)
@@ -167,7 +156,7 @@ def build_strict_context(reviews, user_question):
167
  result += f"{i}. {r['comment'].strip()}\n🔗 لینک: {r['link']}\n\n"
168
  return result
169
 
170
- # ---- Truncation helper ----
171
  def truncate_reviews_to_fit(reviews, max_chars=127000):
172
  total = 0
173
  final = []
@@ -179,7 +168,6 @@ def truncate_reviews_to_fit(reviews, max_chars=127000):
179
  total += size
180
  return final
181
 
182
- # ---- Main answer function ----
183
  def answer_question(user_question, gemini_model):
184
 
185
  print(f"\n🧠 Starting debug for question: {user_question}")
@@ -210,7 +198,7 @@ def answer_question(user_question, gemini_model):
210
  print(f"✂️ After truncation: {len(retrieved)} rows")
211
 
212
  context = build_strict_context(retrieved, user_question)
213
- print("📝 Sample context sent to GPT:\n", context[:100000], "\n...")
214
 
215
  prompt = f"""شما یک دستیار هوشمند انتخاب واحد هستید که فقط و فقط بر اساس نظرات واقعی دانشجویان از کانال @IAUCourseExp پاسخ می‌دهید. کار شما کمک به دانشجویان برای انتخاب استاد و درس، بر اساس تجربیات ثبت‌شده در این کانال است.
216
 
@@ -238,8 +226,5 @@ def answer_question(user_question, gemini_model):
238
  📘 پاسخ نهایی:
239
  """
240
 
241
-
242
- # NEW (Gemini)
243
-
244
  response = gemini_model.generate_content(prompt)
245
  return response.text
 
9
  from transformers import AutoTokenizer, AutoModel
10
 
11
 
 
 
 
12
  index = faiss.read_index("iau_reviews_index.faiss")
13
  with open("iau_metadata.json", "r", encoding="utf-8") as f:
14
  metadata = json.load(f)
15
 
 
 
16
  model = SentenceTransformer("HooshvareLab/bert-fa-zwnj-base")
 
17
 
 
 
18
  tokenizer = AutoTokenizer.from_pretrained("HooshvareLab/bert-fa-zwnj-base")
19
  model = AutoModel.from_pretrained("HooshvareLab/bert-fa-zwnj-base").eval()
20
 
21
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22
  model.to(device)
23
 
 
24
  index = faiss.read_index("iau_reviews_index.faiss")
25
  with open("iau_metadata.json", "r", encoding="utf-8") as f:
26
  metadata = json.load(f)
 
75
  query_tokens = set(query.split())
76
 
77
  def is_strict_match(row):
78
+
79
  prof_tokens = set(str(row["professor"]).strip().split())
80
  course_tokens = set(str(row["course"]).strip().split())
81
 
 
82
  match_prof = prof_tokens & query_tokens
83
  match_course = course_tokens & query_tokens
84
 
85
  return bool(match_prof or match_course)
86
 
 
87
  return [r for r in results if is_strict_match(r)]
88
 
89
 
90
 
91
 
92
 
93
+
94
  def similar(a, b):
95
  return SequenceMatcher(None, a, b).ratio()
96
 
97
+
98
  def keyword_match_reviews(query, metadata):
99
  query = query.strip().replace("؟", "")
100
  keywords = set(query.split())
 
109
  break
110
  return results
111
 
112
+
113
  def relevance_score(row, query):
114
  score = 0
115
  if row["professor"] in query:
 
122
  score += 1
123
  return score
124
 
125
+
126
  def build_strict_context(reviews, user_question):
127
  prof_match_scores = defaultdict(int)
128
  course_match_scores = defaultdict(int)
 
156
  result += f"{i}. {r['comment'].strip()}\n🔗 لینک: {r['link']}\n\n"
157
  return result
158
 
159
+
160
  def truncate_reviews_to_fit(reviews, max_chars=127000):
161
  total = 0
162
  final = []
 
168
  total += size
169
  return final
170
 
 
171
  def answer_question(user_question, gemini_model):
172
 
173
  print(f"\n🧠 Starting debug for question: {user_question}")
 
198
  print(f"✂️ After truncation: {len(retrieved)} rows")
199
 
200
  context = build_strict_context(retrieved, user_question)
201
+ print("📝 Sample context sent to LLM:\n", context[:100000], "\n...")
202
 
203
  prompt = f"""شما یک دستیار هوشمند انتخاب واحد هستید که فقط و فقط بر اساس نظرات واقعی دانشجویان از کانال @IAUCourseExp پاسخ می‌دهید. کار شما کمک به دانشجویان برای انتخاب استاد و درس، بر اساس تجربیات ثبت‌شده در این کانال است.
204
 
 
226
  📘 پاسخ نهایی:
227
  """
228
 
 
 
 
229
  response = gemini_model.generate_content(prompt)
230
  return response.text