recommendationSystembackend / recommend_normal.py
Navanihk's picture
test
427981d
import pickle
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd
import os
import difflib
from huggingface_hub import hf_hub_download
repo_id = "Navanihk/recommendationsystemmovie"
cache_dir = '/tmp/hf_cache'
os.makedirs(cache_dir, exist_ok=True)
def load_data():
try:
# Download the CSV file
csv_path = hf_hub_download(repo_id=repo_id, filename="movieswithposter_updated.csv", cache_dir=cache_dir)
# Load as DataFrame
movies_data = pd.read_csv(csv_path)
return movies_data
except Exception as e:
print(f"Error loading data from Hugging Face: {e}")
# Fallback to local file if available
if os.path.exists('./movieswithposter_updated.csv'):
return pd.read_csv('./movieswithposter_updated.csv')
else:
raise
# Load movie data
movies_data = load_data()
vectorizer_path = hf_hub_download(repo_id=repo_id, filename="model_vectorizer.pkl", cache_dir=cache_dir)
similarity_path = hf_hub_download(repo_id=repo_id, filename="model_similarity.pkl", cache_dir=cache_dir)
def recommend_movies(movie_name):
# Add the movie to the user's history
if vectorizer_path and similarity_path:
# Load the vectorizer and similarity matrix
with open(vectorizer_path, 'rb') as vec_file, open(similarity_path, 'rb') as sim_file:
vectorizer = pickle.load(vec_file)
similarity = pickle.load(sim_file)
print(f"Movies suggested for you based on your past choices: \n")
# Create an aggregate similarity score across all movies in history
combined_similarity = np.zeros(similarity.shape[0])
for past_movie in [movie_name]:
# Find a close match for each movie in the user's history
list_of_all_titles = movies_data['title'].tolist()
find_close_match = difflib.get_close_matches(past_movie, list_of_all_titles)
if find_close_match:
close_match = find_close_match[0]
# Find the index of the movie in the dataset
index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]
# Accumulate the similarity scores
combined_similarity += similarity[index_of_the_movie]
# Sort movies based on the combined similarity score
sorted_similar_movies = list(enumerate(combined_similarity))
sorted_similar_movies = sorted(sorted_similar_movies, key=lambda x: x[1], reverse=True)
# Recommend the top movies that the user hasn't already seen
i = 1
movie_returns = []
for movie in sorted_similar_movies:
index = movie[0]
# title_from_index = movies_data[movies_data.index == index]['title'].values[0]
dataFromtitle = movies_data[movies_data.index == index]
movie_returns.append({'title':dataFromtitle['title'].values[0],'image':dataFromtitle['poster'].values[0]})
print(i, '.',dataFromtitle['title'].values[0], "(Score:", round(movie[1], 2), ")")
i+=1
if i > 35: # Limit recommendations to top 5
break
return movie_returns