Spaces:

pratham0011
/

Smart_Search_Tool

Running

App Files Files Community

pratham0011 commited on Oct 4, 2024

Commit

688dd55

verified ·

1 Parent(s): 2750504

Upload 2 files

Browse files

Files changed (2) hide show

app.py +212 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,212 @@

+import re
+import requests
+from bs4 import BeautifulSoup
+import pandas as pd
+import gradio as gr
+from groq import Groq
+import os
+from dotenv import load_dotenv
+# Step 1: Scrape the free courses from Analytics Vidhya
+url = "https://courses.analyticsvidhya.com/pages/all-free-courses"
+response = requests.get(url)
+soup = BeautifulSoup(response.content, 'html.parser')
+courses = []
+# Extracting course title, image, and course link
+for course_card in soup.find_all('header', class_='course-card__img-container'):
+    img_tag = course_card.find('img', class_='course-card__img')
+    if img_tag:
+        title = img_tag.get('alt')
+        image_url = img_tag.get('src')
+        link_tag = course_card.find_previous('a')
+        if link_tag:
+            course_link = link_tag.get('href')
+            if not course_link.startswith('http'):
+                course_link = 'https://courses.analyticsvidhya.com' + course_link
+            courses.append({
+                'title': title,
+                'image_url': image_url,
+                'course_link': course_link
+            })
+# Step 2: Create DataFrame
+df = pd.DataFrame(courses)
+load_dotenv()
+client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+def search_courses(query):
+    try:
+        print(f"Searching for: {query}")
+        print(f"Number of courses in database: {len(df)}")
+        # Prepare the prompt for Groq
+        prompt = f"""Given the following query: "{query}"
+        Please analyze the query and rank the following courses based on their relevance to the query.
+        Prioritize courses from Analytics Vidhya. Provide a relevance score from 0 to 1 for each course.
+        Only return courses with a relevance score of 0.5 or higher.
+        Return the results in the following format:
+        Title: [Course Title]
+        Relevance: [Score]
+        Courses:
+        {df['title'].to_string(index=False)}
+        """
+        print("Sending request to Groq...")
+        # Get response from Groq
+        response = client.chat.completions.create(
+            model="llama-3.2-1b-preview",
+            messages=[
+                {"role": "system", "content": "You are an AI assistant specialized in course recommendations."},
+                {"role": "user", "content": prompt}
+            ],
+            temperature=0.2,
+            max_tokens=1000
+        )
+        print("Received response from Groq")
+        # Parse Groq's response
+        results = []
+        print("Groq response content:")
+        print(response.choices[0].message.content)
+        # Use regex to extract course titles and relevance scores
+        matches = re.findall(r'\*\*(.+?)\*\*\s*\(Relevance Score: (0\.\d+)\)', response.choices[0].message.content)
+        for title, score in matches:
+            title = title.strip()
+            score = float(score)
+            if score >= 0.5:
+                matching_courses = df[df['title'].str.contains(title[:30], case=False, na=False)]
+                if not matching_courses.empty:
+                    course = matching_courses.iloc[0]
+                    results.append({
+                        'title': course['title'],  # Use the full title from the database
+                        'image_url': course['image_url'],
+                        'course_link': course['course_link'],
+                        'score': score
+                    })
+                    print(f"Added course: {course['title']}")
+                else:
+                    print(f"Warning: Course not found in database: {title}")
+        print(f"Number of results found: {len(results)}")
+        return sorted(results, key=lambda x: x['score'], reverse=True)[:10]  # Return top 10 results
+    except Exception as e:
+        print(f"An error occurred in search_courses: {str(e)}")
+        return []
+def gradio_search(query):
+    result_list = search_courses(query)
+    if result_list:
+        html_output = '<div class="results-container">'
+        for item in result_list:
+            course_title = item['title']
+            course_image = item['image_url']
+            course_link = item['course_link']
+            relevance_score = round(item['score'] * 100, 2)
+            html_output += f'''
+            <div class="course-card">
+                <img src="{course_image}" alt="{course_title}" class="course-image"/>
+                <div class="course-info">
+                    <h3>{course_title}</h3>
+                    <p>Relevance: {relevance_score}%</p>
+                    <a href="{course_link}" target="_blank" class="course-link">View Course</a>
+                </div>
+            </div>'''
+        html_output += '</div>'
+        return html_output
+    else:
+        return '<p class="no-results">No results found. Please try a different query.</p>'
+custom_css = """
+body {
+    font-family: Arial, Helvetica, sans-serif;
+    background-color: #f0f2f5;
+}
+.container {
+    max-width: 600px;
+    margin: 0 auto;
+    padding: 20px;
+}
+.results-container {
+    display: flex;
+    flex-direction: column;
+}
+.course-card {
+    background-color: white;
+    border-radius: 8px;
+    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+    margin-bottom: 20px;
+    overflow: hidden;
+    width: 100%;
+    transition: transform 0.2s;
+}
+.course-card:hover {
+    transform: translateY(-5px);
+}
+.course-image {
+    width: 100%;
+    height: 200px;
+    object-fit: cover;
+}
+.course-info {
+    padding: 15px;
+}
+.course-info h3 {
+    margin-top: 0;
+    font-size: 18px;
+    color: #333;
+}
+.course-info p {
+    color: #666;
+    font-size: 14px;
+    margin-bottom: 10px;
+}
+.course-link {
+    display: inline-block;
+    background-color: #007bff;
+    color: white;
+    padding: 8px 12px;
+    text-decoration: none;
+    border-radius: 4px;
+    font-size: 14px;
+    transition: background-color 0.2s;
+}
+.course-link:hover {
+    background-color: #0056b3;
+}
+.no-results {
+    text-align: center;
+    color: #666;
+    font-style: italic;
+}
+"""
+# Gradio interface
+iface = gr.Interface(
+    fn=gradio_search,
+    inputs=gr.Textbox(label="Enter your search query", placeholder="e.g., machine learning, data science, python"),
+    outputs=gr.HTML(label="Search Results"),
+    title="Analytics Vidhya Smart Search Tool",
+    description="Find the most relevant courses from Analytics Vidhya Website based on your query.",
+    theme="huggingface",
+    css=custom_css,
+    examples=[
+        ["Tableau Course"],
+        ["Machine Learning/Deep Learning with Python"],
+        ["Business Analytics"]
+    ],
+)
+if __name__ == "__main__":
+    iface.launch(debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio==4.44.1
+requests==2.32.3
+pandas==2.2.3
+beautifulsoup4==4.12.3
+groq==0.11.0