Navanihk commited on
Commit
669d4ab
·
1 Parent(s): f00d760
.gitignore ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .idea/
2
+ .vscode/
3
+ .venv*/
4
+ venv*/
5
+ __pycache__/
6
+ dist/
7
+ .coverage*
8
+ htmlcov/
9
+ .tox/
10
+ docs/_build/
11
+ *.pkl
12
+ *.pyc
13
+ __pycache__/
14
+ venv/
15
+ .env
16
+ instance/
17
+ *.sqlite3
18
+ .vercel
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python 3.10.9 image
2
+ FROM python:3.10.9
3
+
4
+ # Copy the current directory contents into the container at .
5
+ COPY . .
6
+
7
+ # Set the working directory to /
8
+ WORKDIR /
9
+
10
+ # Install requirements.txt
11
+ RUN pip install --no-cache-dir --upgrade -r /requirements.txt
12
+
13
+ # Start the FastAPI app on port 7860, the default port expected by Spaces
14
+ CMD ["python", "app.py"]
README copy.md ADDED
Binary file (178 Bytes). View file
 
recommend_normal.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ from sklearn.metrics.pairwise import cosine_similarity
3
+ import numpy as np
4
+ import pandas as pd
5
+ import os
6
+ import difflib
7
+
8
+ from huggingface_hub import hf_hub_download
9
+
10
+ repo_id = "Navanihk/recommendationsystemmovie"
11
+ def load_data():
12
+ try:
13
+
14
+ # Download the CSV file
15
+ csv_path = hf_hub_download(repo_id=repo_id, filename="movieswithposter_updated.csv")
16
+
17
+ # Load as DataFrame
18
+ movies_data = pd.read_csv(csv_path)
19
+ return movies_data
20
+ except Exception as e:
21
+ print(f"Error loading data from Hugging Face: {e}")
22
+ # Fallback to local file if available
23
+ if os.path.exists('./movieswithposter_updated.csv'):
24
+ return pd.read_csv('./movieswithposter_updated.csv')
25
+ else:
26
+ raise
27
+
28
+ # Load movie data
29
+ movies_data = load_data()
30
+ vectorizer_path = hf_hub_download(repo_id=repo_id, filename="feature_vector.pkl")
31
+ similarity_path = hf_hub_download(repo_id=repo_id, filename="model_similarity.pkl")
32
+ def recommend_movies(movie_name):
33
+ # Add the movie to the user's history
34
+ if vectorizer_path and similarity_path:
35
+ # Load the vectorizer and similarity matrix
36
+ with open(vectorizer_path, 'rb') as vec_file, open(similarity_path, 'rb') as sim_file:
37
+ vectorizer = pickle.load(vec_file)
38
+ similarity = pickle.load(sim_file)
39
+
40
+ print(f"Movies suggested for you based on your past choices: \n")
41
+
42
+ # Create an aggregate similarity score across all movies in history
43
+ combined_similarity = np.zeros(similarity.shape[0])
44
+
45
+ for past_movie in [movie_name]:
46
+ # Find a close match for each movie in the user's history
47
+ list_of_all_titles = movies_data['title'].tolist()
48
+ find_close_match = difflib.get_close_matches(past_movie, list_of_all_titles)
49
+
50
+ if find_close_match:
51
+ close_match = find_close_match[0]
52
+ # Find the index of the movie in the dataset
53
+ index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]
54
+ # Accumulate the similarity scores
55
+ combined_similarity += similarity[index_of_the_movie]
56
+
57
+ # Sort movies based on the combined similarity score
58
+ sorted_similar_movies = list(enumerate(combined_similarity))
59
+ sorted_similar_movies = sorted(sorted_similar_movies, key=lambda x: x[1], reverse=True)
60
+
61
+ # Recommend the top movies that the user hasn't already seen
62
+ i = 1
63
+ movie_returns = []
64
+ for movie in sorted_similar_movies:
65
+ index = movie[0]
66
+ # title_from_index = movies_data[movies_data.index == index]['title'].values[0]
67
+ dataFromtitle = movies_data[movies_data.index == index]
68
+ movie_returns.append({'title':dataFromtitle['title'].values[0],'image':dataFromtitle['poster'].values[0]})
69
+ print(i, '.',dataFromtitle['title'].values[0], "(Score:", round(movie[1], 2), ")")
70
+
71
+ i+=1
72
+ if i > 35: # Limit recommendations to top 5
73
+ break
74
+ return movie_returns
75
+
recommendwithdesc.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ from sklearn.metrics.pairwise import cosine_similarity
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ from huggingface_hub import hf_hub_download
7
+
8
+ repo_id = "Navanihk/recommendationsystemmovie"
9
+ def load_data():
10
+ try:
11
+
12
+ # Download the CSV file
13
+ csv_path = hf_hub_download(repo_id=repo_id, filename="movieswithposter_updated.csv")
14
+
15
+ # Load as DataFrame
16
+ movies_data = pd.read_csv(csv_path)
17
+ return movies_data
18
+ except Exception as e:
19
+ print(f"Error loading data from Hugging Face: {e}")
20
+ # Fallback to local file if available
21
+ if os.path.exists('./movieswithposter_updated.csv'):
22
+ return pd.read_csv('./movieswithposter_updated.csv')
23
+ else:
24
+ raise
25
+
26
+ # Load movie data
27
+ movies_data = load_data()
28
+ model_vectorizer = hf_hub_download(repo_id=repo_id, filename="model_vectorizer.pkl")
29
+ similarity_path = hf_hub_download(repo_id=repo_id, filename="model_similarity.pkl")
30
+ with open(model_vectorizer, 'rb') as vec_file, open(similarity_path, 'rb') as sim_file:
31
+ vectorizer = pickle.load(vec_file)
32
+ similarity = pickle.load(sim_file)
33
+ def recommend_movies_with_desc(query):
34
+ # Transform the query into a feature vector using the same vectorizer
35
+ feature_vecto = vectorizer.transform(query)
36
+ with open('feature_vector.pkl', 'rb') as feature:
37
+ feature_vectors = pickle.load(feature)
38
+
39
+ # Calculate cosine similarity between the query vector and the feature vectors of the movies
40
+ sim = cosine_similarity(feature_vectors, feature_vecto)
41
+
42
+ # Extract the similarity scores for the query against all movies
43
+ combined_similarity = sim.flatten()
44
+
45
+ # Sort the movies by similarity score
46
+ sorted_similar_movies = list(enumerate(combined_similarity))
47
+ sorted_similar_movies = sorted(sorted_similar_movies, key=lambda x: x[1], reverse=True)
48
+
49
+ # Print out the top 5 similar movies
50
+ i = 1
51
+ movie_recom=[]
52
+ for movie in sorted_similar_movies:
53
+ index = movie[0]
54
+ # title_from_index = movies_data.iloc[index]['title'] # Assuming movies_data is a DataFrame
55
+ dataFromtitle = movies_data[movies_data.index == index]
56
+ movie_recom.append({'title':dataFromtitle['title'].values[0],'image':dataFromtitle['poster'].values[0]})
57
+ print(i, '.',dataFromtitle['title'].values[0], "(Score:", round(movie[1], 2), ")")
58
+ i += 1
59
+ if i > 35: # Limit recommendations to top 5
60
+ break
61
+ return movie_recom
recommendwithhist.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import difflib
4
+ from sklearn.feature_extraction.text import TfidfVectorizer
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ import pickle
7
+ import os
8
+
9
+ import nltk
10
+ nltk.download('punkt')
11
+
12
+ from huggingface_hub import hf_hub_download
13
+ nltk.download('punkt_tab')
14
+ from nltk.stem import PorterStemmer
15
+ from nltk.tokenize import word_tokenize
16
+ repo_id = "Navanihk/recommendationsystemmovie"
17
+ def stemmed_tokenizer(text):
18
+ ps = PorterStemmer()
19
+ words = word_tokenize(text)
20
+ return [ps.stem(word) for word in words]
21
+ # Initialize an empty dictionary to store user history
22
+ user_history = {}
23
+ # Function to save user history to a pickle file
24
+ def save_user_history():
25
+ with open('user_history.pkl', 'wb') as file:
26
+ pickle.dump(user_history, file)
27
+
28
+ # Function to load user history from a pickle file
29
+ def load_user_history():
30
+ global user_history
31
+ if os.path.exists('user_history.pkl'):
32
+ with open('user_history.pkl', 'rb') as file:
33
+ user_history = pickle.load(file)
34
+
35
+ # Load movie data
36
+ # movies_data = pd.read_csv('./movieswithposter_updated.csv')
37
+ def load_data():
38
+ try:
39
+
40
+ # Download the CSV file
41
+ csv_path = hf_hub_download(repo_id=repo_id, filename="movieswithposter_updated.csv")
42
+
43
+ # Load as DataFrame
44
+ movies_data = pd.read_csv(csv_path)
45
+ return movies_data
46
+ except Exception as e:
47
+ print(f"Error loading data from Hugging Face: {e}")
48
+ # Fallback to local file if available
49
+ if os.path.exists('./movieswithposter_updated.csv'):
50
+ return pd.read_csv('./movieswithposter_updated.csv')
51
+ else:
52
+ raise
53
+
54
+ # Load movie data
55
+ movies_data = load_data()
56
+
57
+ # Pre-process data
58
+ selected_features = ['genres', 'keywords', 'tagline', 'cast', 'director']
59
+ for feature in selected_features:
60
+ movies_data[feature] = movies_data[feature].fillna('')
61
+
62
+ # Combine features
63
+ combined_features = movies_data['genres'] + ' ' + movies_data['keywords'] + ' ' + movies_data['tagline'] + ' ' + movies_data['cast'] + ' ' + movies_data['director']
64
+ model_vectorizer = hf_hub_download(repo_id=repo_id, filename="model_vectorizer.pkl")
65
+ similarity_path = hf_hub_download(repo_id=repo_id, filename="model_similarity.pkl")
66
+ # Check if the model (vectorizer and similarity) exists
67
+ if model_vectorizer and similarity_path:
68
+ # Load the vectorizer and similarity matrix
69
+ with open(model_vectorizer, 'rb') as vec_file, open(similarity_path, 'rb') as sim_file:
70
+ vectorizer = pickle.load(vec_file)
71
+ similarity = pickle.load(sim_file)
72
+ else:
73
+ # Train the model if it doesn't exist
74
+ vectorizer = TfidfVectorizer(stop_words='english',tokenizer=stemmed_tokenizer)
75
+ feature_vectors = vectorizer.fit_transform(combined_features)
76
+ with open('feature_vector.pkl', 'wb') as file:
77
+ pickle.dump(feature_vectors, file)
78
+ # Calculate cosine similarity
79
+ similarity = cosine_similarity(feature_vectors)
80
+
81
+ # Save the model (vectorizer and similarity matrix)
82
+ with open('model_vectorizer.pkl', 'wb') as vec_file, open('model_similarity.pkl', 'wb') as sim_file:
83
+ pickle.dump(vectorizer, vec_file)
84
+ pickle.dump(similarity, sim_file)
85
+
86
+ # Function to recommend movies based on both user input and history
87
+ def recommend_movieswithhistory(user_id, movie_name):
88
+ # Add the movie to the user's history
89
+ add_to_history(user_id, movie_name)
90
+ print(user_id,movie_name)
91
+ # Fetch the user's history
92
+ history = get_history(user_id)
93
+
94
+ if len(history) == 0:
95
+ print("No history found for the user.")
96
+ return
97
+
98
+ print(f"Movies suggested for you based on your past choices: {history}\n")
99
+
100
+ # Create an aggregate similarity score across all movies in history
101
+ combined_similarity = np.zeros(similarity.shape[0])
102
+
103
+ for past_movie in history:
104
+ # Find a close match for each movie in the user's history
105
+ list_of_all_titles = movies_data['title'].tolist()
106
+ find_close_match = difflib.get_close_matches(past_movie, list_of_all_titles)
107
+
108
+ if find_close_match:
109
+ close_match = find_close_match[0]
110
+ # Find the index of the movie in the dataset
111
+ index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]
112
+ # Accumulate the similarity scores
113
+ combined_similarity += similarity[index_of_the_movie]
114
+
115
+ # Sort movies based on the combined similarity score
116
+ sorted_similar_movies = list(enumerate(combined_similarity))
117
+ sorted_similar_movies = sorted(sorted_similar_movies, key=lambda x: x[1], reverse=True)
118
+
119
+ # Recommend the top movies that the user hasn't already seen
120
+ i = 1
121
+ movie_return=[]
122
+ for movie in sorted_similar_movies:
123
+ index = movie[0]
124
+ # title_from_index = movies_data[movies_data.index == index]['title'].values[0]
125
+ dataFromtitle = movies_data[movies_data.index == index]
126
+
127
+
128
+ if dataFromtitle['title'].values[0] not in history: # Don't recommend movies the user has already interacted with
129
+
130
+ print(i, '.',dataFromtitle['title'].values[0], "(Score:", round(movie[1], 2), ")")
131
+ movie_return.append({'title':dataFromtitle['title'].values[0],'image':dataFromtitle['poster'].values[0]})
132
+ i += 1
133
+ if i > 35: # Limit recommendations to top 5
134
+ break
135
+ return movie_return
136
+
137
+ # Function to add a movie to user history
138
+ def add_to_history(user_id, movie_title):
139
+ if user_id not in user_history:
140
+ user_history[user_id] = []
141
+ user_history[user_id].append(movie_title)
142
+ save_user_history() # Save the updated history after adding a movie
143
+
144
+ # Function to get movies from user history
145
+ def get_history(user_id):
146
+ return user_history.get(user_id, [])
147
+
148
+ # Load the user history at the start of the program
149
+ load_user_history()
150
+
151
+
requirements.txt ADDED
Binary file (1.32 kB). View file
 
vercel.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": 2,
3
+ "builds": [
4
+ {
5
+ "src": "./main.py",
6
+ "use": "@vercel/python"
7
+ }
8
+ ],
9
+ "routes": [
10
+ {
11
+ "src": "/(.*)",
12
+ "dest": "/"
13
+ }
14
+ ]
15
+ }