aswin-10 commited on
Commit
ae4467e
·
verified ·
1 Parent(s): 153abd4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ import openai
6
+ import os
7
+
8
+ # Set up your OpenAI API key (consider using environment variables for security)
9
+ openai.api_key = os.getenv("OPENAI_API_KEY")
10
+
11
+ # Load course data from CSV file
12
+ df = pd.read_csv('course_data.csv')
13
+
14
+ # Combine relevant columns into one text representation for each course
15
+ def combine_course_text(row):
16
+ return f"{row['Course Title']} {row['Description']} {row['All Chapter Titles']} {row['All Lessons']}"
17
+
18
+ df['combined_text'] = df.apply(combine_course_text, axis=1)
19
+
20
+ # Function to get embeddings for text using OpenAI's API
21
+ def get_embedding(text):
22
+ try:
23
+ response = openai.Embedding.create(
24
+ input=text,
25
+ model="text-embedding-ada-002"
26
+ )
27
+ embedding = response['data'][0]['embedding']
28
+ return embedding
29
+ except Exception as e:
30
+ print(f"Error while getting embedding: {e}")
31
+ return None
32
+
33
+ # Pre-compute embeddings for all courses in the dataset
34
+ course_embeddings = []
35
+ course_titles = df['Course Title'].tolist()
36
+
37
+ for text in df['combined_text']:
38
+ embedding = get_embedding(text)
39
+
40
+ if embedding is not None:
41
+ course_embeddings.append(embedding)
42
+
43
+ else:
44
+ print(f"Failed to generate embedding for: {text}")
45
+
46
+ # Convert embeddings to numpy array (for cosine_similarity to work properly)
47
+ course_embeddings = np.array(course_embeddings)
48
+
49
+ # Function to search courses based on a query
50
+ def search_courses(query):
51
+ # Get embedding for query
52
+ query_embedding = get_embedding(query)
53
+
54
+ if query_embedding is None:
55
+ return "Error in generating query embedding."
56
+
57
+ # Compute cosine similarity between query embedding and course embeddings
58
+ similarities = cosine_similarity([query_embedding], course_embeddings)
59
+
60
+ # Sort by similarity
61
+ sorted_indices = np.argsort(similarities[0])[::-1]
62
+
63
+ # Get top 3 courses based on similarity
64
+ top_courses = [course_titles[i] for i in sorted_indices[:3]]
65
+
66
+ return top_courses
67
+
68
+ # Gradio Interface
69
+ def gradio_search(query):
70
+ if query.strip():
71
+ results = search_courses(query)
72
+ if len(results) < 3:
73
+ results.extend(["No results"] * (3 - len(results)))
74
+ return "Top relevant courses:\n1. " + results[0] + "\n2. " + results[1] + "\n3. " + results[2]
75
+
76
+ # Create Gradio interface
77
+ interface = gr.Interface(
78
+ fn=gradio_search,
79
+ inputs="text",
80
+ outputs="text",
81
+ title="Smart Course Search",
82
+ description="Enter a query and get the most relevant courses from the dataset.")
83
+
84
+ # Launch the Gradio interface
85
+ interface.launch(share=True)