pratham0011 commited on
Commit
688dd55
·
verified ·
1 Parent(s): 2750504

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +212 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import pandas as pd
5
+ import gradio as gr
6
+ from groq import Groq
7
+ import os
8
+ from dotenv import load_dotenv
9
+
10
+ # Step 1: Scrape the free courses from Analytics Vidhya
11
+ url = "https://courses.analyticsvidhya.com/pages/all-free-courses"
12
+ response = requests.get(url)
13
+ soup = BeautifulSoup(response.content, 'html.parser')
14
+
15
+ courses = []
16
+
17
+ # Extracting course title, image, and course link
18
+ for course_card in soup.find_all('header', class_='course-card__img-container'):
19
+ img_tag = course_card.find('img', class_='course-card__img')
20
+
21
+ if img_tag:
22
+ title = img_tag.get('alt')
23
+ image_url = img_tag.get('src')
24
+
25
+ link_tag = course_card.find_previous('a')
26
+ if link_tag:
27
+ course_link = link_tag.get('href')
28
+ if not course_link.startswith('http'):
29
+ course_link = 'https://courses.analyticsvidhya.com' + course_link
30
+
31
+ courses.append({
32
+ 'title': title,
33
+ 'image_url': image_url,
34
+ 'course_link': course_link
35
+ })
36
+
37
+ # Step 2: Create DataFrame
38
+ df = pd.DataFrame(courses)
39
+
40
+
41
+ load_dotenv()
42
+ client = Groq(api_key=os.getenv("GROQ_API_KEY"))
43
+
44
+ def search_courses(query):
45
+ try:
46
+ print(f"Searching for: {query}")
47
+ print(f"Number of courses in database: {len(df)}")
48
+
49
+ # Prepare the prompt for Groq
50
+ prompt = f"""Given the following query: "{query}"
51
+ Please analyze the query and rank the following courses based on their relevance to the query.
52
+ Prioritize courses from Analytics Vidhya. Provide a relevance score from 0 to 1 for each course.
53
+ Only return courses with a relevance score of 0.5 or higher.
54
+ Return the results in the following format:
55
+ Title: [Course Title]
56
+ Relevance: [Score]
57
+
58
+ Courses:
59
+ {df['title'].to_string(index=False)}
60
+ """
61
+
62
+ print("Sending request to Groq...")
63
+ # Get response from Groq
64
+ response = client.chat.completions.create(
65
+ model="llama-3.2-1b-preview",
66
+ messages=[
67
+ {"role": "system", "content": "You are an AI assistant specialized in course recommendations."},
68
+ {"role": "user", "content": prompt}
69
+ ],
70
+ temperature=0.2,
71
+ max_tokens=1000
72
+ )
73
+ print("Received response from Groq")
74
+
75
+ # Parse Groq's response
76
+ results = []
77
+ print("Groq response content:")
78
+ print(response.choices[0].message.content)
79
+
80
+ # Use regex to extract course titles and relevance scores
81
+ matches = re.findall(r'\*\*(.+?)\*\*\s*\(Relevance Score: (0\.\d+)\)', response.choices[0].message.content)
82
+
83
+ for title, score in matches:
84
+ title = title.strip()
85
+ score = float(score)
86
+ if score >= 0.5:
87
+ matching_courses = df[df['title'].str.contains(title[:30], case=False, na=False)]
88
+ if not matching_courses.empty:
89
+ course = matching_courses.iloc[0]
90
+ results.append({
91
+ 'title': course['title'], # Use the full title from the database
92
+ 'image_url': course['image_url'],
93
+ 'course_link': course['course_link'],
94
+ 'score': score
95
+ })
96
+ print(f"Added course: {course['title']}")
97
+ else:
98
+ print(f"Warning: Course not found in database: {title}")
99
+
100
+ print(f"Number of results found: {len(results)}")
101
+ return sorted(results, key=lambda x: x['score'], reverse=True)[:10] # Return top 10 results
102
+
103
+ except Exception as e:
104
+ print(f"An error occurred in search_courses: {str(e)}")
105
+ return []
106
+
107
+ def gradio_search(query):
108
+ result_list = search_courses(query)
109
+
110
+ if result_list:
111
+ html_output = '<div class="results-container">'
112
+ for item in result_list:
113
+ course_title = item['title']
114
+ course_image = item['image_url']
115
+ course_link = item['course_link']
116
+ relevance_score = round(item['score'] * 100, 2)
117
+
118
+ html_output += f'''
119
+ <div class="course-card">
120
+ <img src="{course_image}" alt="{course_title}" class="course-image"/>
121
+ <div class="course-info">
122
+ <h3>{course_title}</h3>
123
+ <p>Relevance: {relevance_score}%</p>
124
+ <a href="{course_link}" target="_blank" class="course-link">View Course</a>
125
+ </div>
126
+ </div>'''
127
+ html_output += '</div>'
128
+ return html_output
129
+ else:
130
+ return '<p class="no-results">No results found. Please try a different query.</p>'
131
+
132
+ custom_css = """
133
+ body {
134
+ font-family: Arial, Helvetica, sans-serif;
135
+ background-color: #f0f2f5;
136
+ }
137
+ .container {
138
+ max-width: 600px;
139
+ margin: 0 auto;
140
+ padding: 20px;
141
+ }
142
+ .results-container {
143
+ display: flex;
144
+ flex-direction: column;
145
+ }
146
+ .course-card {
147
+ background-color: white;
148
+ border-radius: 8px;
149
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
150
+ margin-bottom: 20px;
151
+ overflow: hidden;
152
+ width: 100%;
153
+ transition: transform 0.2s;
154
+ }
155
+ .course-card:hover {
156
+ transform: translateY(-5px);
157
+ }
158
+ .course-image {
159
+ width: 100%;
160
+ height: 200px;
161
+ object-fit: cover;
162
+ }
163
+ .course-info {
164
+ padding: 15px;
165
+ }
166
+ .course-info h3 {
167
+ margin-top: 0;
168
+ font-size: 18px;
169
+ color: #333;
170
+ }
171
+ .course-info p {
172
+ color: #666;
173
+ font-size: 14px;
174
+ margin-bottom: 10px;
175
+ }
176
+ .course-link {
177
+ display: inline-block;
178
+ background-color: #007bff;
179
+ color: white;
180
+ padding: 8px 12px;
181
+ text-decoration: none;
182
+ border-radius: 4px;
183
+ font-size: 14px;
184
+ transition: background-color 0.2s;
185
+ }
186
+ .course-link:hover {
187
+ background-color: #0056b3;
188
+ }
189
+ .no-results {
190
+ text-align: center;
191
+ color: #666;
192
+ font-style: italic;
193
+ }
194
+ """
195
+ # Gradio interface
196
+ iface = gr.Interface(
197
+ fn=gradio_search,
198
+ inputs=gr.Textbox(label="Enter your search query", placeholder="e.g., machine learning, data science, python"),
199
+ outputs=gr.HTML(label="Search Results"),
200
+ title="Analytics Vidhya Smart Search Tool",
201
+ description="Find the most relevant courses from Analytics Vidhya Website based on your query.",
202
+ theme="huggingface",
203
+ css=custom_css,
204
+ examples=[
205
+ ["Tableau Course"],
206
+ ["Machine Learning/Deep Learning with Python"],
207
+ ["Business Analytics"]
208
+ ],
209
+ )
210
+
211
+ if __name__ == "__main__":
212
+ iface.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio==4.44.1
2
+ requests==2.32.3
3
+ pandas==2.2.3
4
+ beautifulsoup4==4.12.3
5
+ groq==0.11.0