|
import pickle |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
import numpy as np |
|
import pandas as pd |
|
import os |
|
import difflib |
|
|
|
from huggingface_hub import hf_hub_download |
|
|
|
repo_id = "Navanihk/recommendationsystemmovie" |
|
cache_dir = '/tmp/hf_cache' |
|
os.makedirs(cache_dir, exist_ok=True) |
|
def load_data(): |
|
try: |
|
|
|
|
|
csv_path = hf_hub_download(repo_id=repo_id, filename="movieswithposter_updated.csv", cache_dir=cache_dir) |
|
|
|
movies_data = pd.read_csv(csv_path) |
|
return movies_data |
|
except Exception as e: |
|
print(f"Error loading data from Hugging Face: {e}") |
|
|
|
if os.path.exists('./movieswithposter_updated.csv'): |
|
return pd.read_csv('./movieswithposter_updated.csv') |
|
else: |
|
raise |
|
|
|
|
|
movies_data = load_data() |
|
vectorizer_path = hf_hub_download(repo_id=repo_id, filename="model_vectorizer.pkl", cache_dir=cache_dir) |
|
similarity_path = hf_hub_download(repo_id=repo_id, filename="model_similarity.pkl", cache_dir=cache_dir) |
|
def recommend_movies(movie_name): |
|
|
|
if vectorizer_path and similarity_path: |
|
|
|
with open(vectorizer_path, 'rb') as vec_file, open(similarity_path, 'rb') as sim_file: |
|
vectorizer = pickle.load(vec_file) |
|
similarity = pickle.load(sim_file) |
|
|
|
print(f"Movies suggested for you based on your past choices: \n") |
|
|
|
|
|
combined_similarity = np.zeros(similarity.shape[0]) |
|
|
|
for past_movie in [movie_name]: |
|
|
|
list_of_all_titles = movies_data['title'].tolist() |
|
find_close_match = difflib.get_close_matches(past_movie, list_of_all_titles) |
|
|
|
if find_close_match: |
|
close_match = find_close_match[0] |
|
|
|
index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0] |
|
|
|
combined_similarity += similarity[index_of_the_movie] |
|
|
|
|
|
sorted_similar_movies = list(enumerate(combined_similarity)) |
|
sorted_similar_movies = sorted(sorted_similar_movies, key=lambda x: x[1], reverse=True) |
|
|
|
|
|
i = 1 |
|
movie_returns = [] |
|
for movie in sorted_similar_movies: |
|
index = movie[0] |
|
|
|
dataFromtitle = movies_data[movies_data.index == index] |
|
movie_returns.append({'title':dataFromtitle['title'].values[0],'image':dataFromtitle['poster'].values[0]}) |
|
print(i, '.',dataFromtitle['title'].values[0], "(Score:", round(movie[1], 2), ")") |
|
|
|
i+=1 |
|
if i > 35: |
|
break |
|
return movie_returns |
|
|