IAUCourseExp commited on
Commit
cb87184
·
verified ·
1 Parent(s): bd3c3a6

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +20 -64
  2. my_logic.py +114 -0
  3. requirements.txt +6 -1
app.py CHANGED
@@ -1,64 +1,20 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
- )
61
-
62
-
63
- if __name__ == "__main__":
64
- demo.launch()
 
1
+ import gradio as gr
2
+ import google.generativeai as genai
3
+ from my_logic import answer_question
4
+ import os
5
+
6
+ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
7
+ gemini_model = genai.GenerativeModel("models/gemini-2.0-pro-exp-02-05")
8
+
9
+ def chatbot_interface(user_question):
10
+ return answer_question(user_question, gemini_model)
11
+
12
+ demo = gr.Interface(
13
+ fn=chatbot_interface,
14
+ inputs=gr.Textbox(lines=2, placeholder="مثلاً: برای مدار منطقی استاد شایگان چطوره؟", label="❓ سوال شما"),
15
+ outputs=gr.Textbox(label="📘 پاسخ"),
16
+ title= "🤖 ربات مشاور تجربیات انتخاب واحد",
17
+ description="پاسخ بر اساس تجربیات واقعی دانشجویان از کانال @IAUCourseExp"
18
+ )
19
+
20
+ demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
my_logic.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from collections import defaultdict
3
+ from difflib import SequenceMatcher
4
+
5
+ # NOTE: You must define search_reviews, filter_relevant, metadata, etc.
6
+
7
+ def similar(a, b):
8
+ return SequenceMatcher(None, a, b).ratio()
9
+
10
+ def keyword_match_reviews(query, metadata):
11
+ query = query.strip().replace("؟", "")
12
+ keywords = set(query.split())
13
+ results = []
14
+ for row in metadata:
15
+ prof = str(row["professor"])
16
+ course = str(row["course"])
17
+ for k in keywords:
18
+ if k in prof or k in course or similar(k, prof) > 0.7 or similar(k, course) > 0.7:
19
+ results.append(row)
20
+ break
21
+ return results
22
+
23
+ def relevance_score(row, query):
24
+ score = 0
25
+ if row["professor"] in query:
26
+ score += 2
27
+ if row["course"] in query:
28
+ score += 2
29
+ if row["professor"].split()[0] in query:
30
+ score += 1
31
+ if row["course"].split()[0] in query:
32
+ score += 1
33
+ return score
34
+
35
+ def build_strict_context(reviews, user_question):
36
+ prof_match_scores = defaultdict(int)
37
+ course_match_scores = defaultdict(int)
38
+ for r in reviews:
39
+ prof_sim = similar(user_question, r["professor"])
40
+ course_sim = similar(user_question, r["course"])
41
+ if prof_sim > 0.6:
42
+ prof_match_scores[r["professor"]] += prof_sim
43
+ if course_sim > 0.6:
44
+ course_match_scores[r["course"]] += course_sim
45
+
46
+ best_prof = max(prof_match_scores, key=prof_match_scores.get, default="")
47
+ best_course = max(course_match_scores, key=course_match_scores.get, default="")
48
+
49
+ if best_prof and best_course:
50
+ filtered = [r for r in reviews if similar(best_prof, r["professor"]) > 0.85 and similar(best_course, r["course"]) > 0.85]
51
+ elif best_course:
52
+ filtered = [r for r in reviews if similar(best_course, r["course"]) > 0.85]
53
+ elif best_prof:
54
+ filtered = [r for r in reviews if similar(best_prof, r["professor"]) > 0.85]
55
+ else:
56
+ filtered = reviews
57
+
58
+ result = f"👨‍🏫 استاد: {best_prof or '[نامشخص]'} — 📚 درس: {best_course or '[نامشخص]'}\\n💬 نظرات:\\n"
59
+ for i, r in enumerate(filtered, 1):
60
+ result += f"{i}. {r['comment'].strip()}\\n🔗 لینک: {r['link']}\\n\\n"
61
+ return result
62
+
63
+ def truncate_reviews_to_fit(reviews, max_chars=127000):
64
+ total = 0
65
+ final = []
66
+ for r in reviews:
67
+ size = len(r["comment"])
68
+ if total + size > max_chars:
69
+ break
70
+ final.append(r)
71
+ total += size
72
+ return final
73
+
74
+ def answer_question(user_question, model):
75
+ print(f"\\n🧠 Starting debug for question: {user_question}")
76
+ retrieved = search_reviews(user_question, top_k=100)
77
+ print(f"🔍 FAISS returned {len(retrieved)} raw rows")
78
+ retrieved = filter_relevant(retrieved, user_question)
79
+ print(f"✅ After filter_relevant(): {len(retrieved)} rows")
80
+ keyword_hits = keyword_match_reviews(user_question, metadata)
81
+ print(f"🔠 Keyword hits found: {len(keyword_hits)}")
82
+ existing_links = set(r["link"] for r in retrieved)
83
+ added = 0
84
+ for r in keyword_hits:
85
+ if r["link"] not in existing_links:
86
+ retrieved.append(r)
87
+ added += 1
88
+ print(f"➕ Added {added} unique fallback keyword rows")
89
+ print(f"📊 Total before truncation: {len(retrieved)}")
90
+ if not retrieved:
91
+ return "❌ هیچ تجربه‌ای در مورد سوال شما در داده‌های کانال یافت نشد."
92
+ retrieved.sort(key=lambda r: relevance_score(r, user_question), reverse=True)
93
+ retrieved = truncate_reviews_to_fit(retrieved)
94
+ print(f"✂️ After truncation: {len(retrieved)} rows")
95
+ context = build_strict_context(retrieved, user_question)
96
+ print("📝 Sample context sent to Gemini:\\n", context[:1000], "\\n...")
97
+ prompt = f\"\"\"شما یک دستیار هوشمند انتخاب واحد هستید که فقط و فقط بر اساس نظرات واقعی دانشجویان از کانال @IAUCourseExp پاسخ می‌دهید.
98
+ ❗ قوانین مهم:
99
+ - فقط از داده‌های همین نظرات استفاده کن.
100
+ - اگر هیچ نظری نیست، بگو: «هیچ تجربه‌ای دربارهٔ این مورد در کانال ثبت نشده است.»
101
+ - سوالات ممکنه درباره یک استاد، درس، مقایسه، یا معرفی بهترین/بدترین‌ها باشه.
102
+ - همه نظرات رو تحلیل کن. لینک هر کدوم رو هم بیار.
103
+ - در پایان جمع‌بندی کن و بنویس:
104
+ 📊 این پاسخ بر اساس بررسی {len(retrieved)} نظر دانشجویی نوشته شده است.
105
+ 🔎 سوال دانشجو:
106
+ {user_question}
107
+
108
+ 📄 نظرات دانشجویان:
109
+ {context}
110
+
111
+ 📘 پاسخ نهایی:
112
+ \"\"\"
113
+ response = model.generate_content(prompt)
114
+ return response.text
requirements.txt CHANGED
@@ -1 +1,6 @@
1
- huggingface_hub==0.25.2
 
 
 
 
 
 
1
+ pandas
2
+ numpy
3
+ gradio
4
+ google-generativeai
5
+ faiss-cpu
6
+ sentence-transformers