recommendationSystembackend / recommendwithdesc.py
Navanihk's picture
gg
d818e73
import pickle
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd
import os
from huggingface_hub import hf_hub_download
import nltk
repo_id = "Navanihk/recommendationsystemmovie"
cache_dir = '/tmp/hf_cache'
os.makedirs(cache_dir, exist_ok=True)
nltk.data.path.append(cache_dir)
nltk.download('punkt', download_dir=cache_dir)
nltk.download('punkt_tab',download_dir=cache_dir)
def load_data():
try:
# Download the CSV file
# Download the CSV file
csv_path = hf_hub_download(repo_id=repo_id, filename="movieswithposter_updated.csv", cache_dir=cache_dir)
# Load as DataFrame
movies_data = pd.read_csv(csv_path)
return movies_data
except Exception as e:
print(f"Error loading data from Hugging Face: {e}")
# Fallback to local file if available
if os.path.exists('./movieswithposter_updated.csv'):
return pd.read_csv('./movieswithposter_updated.csv')
else:
raise
# Load movie data
movies_data = load_data()
model_vectorizer = hf_hub_download(repo_id=repo_id, filename="model_vectorizer.pkl", cache_dir=cache_dir)
similarity_path = hf_hub_download(repo_id=repo_id, filename="model_similarity.pkl", cache_dir=cache_dir)
feature_vector = hf_hub_download(repo_id=repo_id, filename="feature_vector.pkl", cache_dir=cache_dir)
with open(model_vectorizer, 'rb') as vec_file, open(similarity_path, 'rb') as sim_file:
vectorizer = pickle.load(vec_file)
similarity = pickle.load(sim_file)
def recommend_movies_with_desc(query):
# Transform the query into a feature vector using the same vectorizer
feature_vecto = vectorizer.transform(query)
with open(feature_vector, 'rb') as feature:
feature_vectors = pickle.load(feature)
# Calculate cosine similarity between the query vector and the feature vectors of the movies
sim = cosine_similarity(feature_vectors, feature_vecto)
# Extract the similarity scores for the query against all movies
combined_similarity = sim.flatten()
# Sort the movies by similarity score
sorted_similar_movies = list(enumerate(combined_similarity))
sorted_similar_movies = sorted(sorted_similar_movies, key=lambda x: x[1], reverse=True)
# Print out the top 5 similar movies
i = 1
movie_recom=[]
for movie in sorted_similar_movies:
index = movie[0]
# title_from_index = movies_data.iloc[index]['title'] # Assuming movies_data is a DataFrame
dataFromtitle = movies_data[movies_data.index == index]
movie_recom.append({'title':dataFromtitle['title'].values[0],'image':dataFromtitle['poster'].values[0]})
print(i, '.',dataFromtitle['title'].values[0], "(Score:", round(movie[1], 2), ")")
i += 1
if i > 35: # Limit recommendations to top 5
break
return movie_recom