import pickle from sklearn.metrics.pairwise import cosine_similarity import numpy as np import pandas as pd import os from huggingface_hub import hf_hub_download import nltk repo_id = "Navanihk/recommendationsystemmovie" cache_dir = '/tmp/hf_cache' os.makedirs(cache_dir, exist_ok=True) nltk.data.path.append(cache_dir) nltk.download('punkt', download_dir=cache_dir) nltk.download('punkt_tab',download_dir=cache_dir) def load_data(): try: # Download the CSV file # Download the CSV file csv_path = hf_hub_download(repo_id=repo_id, filename="movieswithposter_updated.csv", cache_dir=cache_dir) # Load as DataFrame movies_data = pd.read_csv(csv_path) return movies_data except Exception as e: print(f"Error loading data from Hugging Face: {e}") # Fallback to local file if available if os.path.exists('./movieswithposter_updated.csv'): return pd.read_csv('./movieswithposter_updated.csv') else: raise # Load movie data movies_data = load_data() model_vectorizer = hf_hub_download(repo_id=repo_id, filename="model_vectorizer.pkl", cache_dir=cache_dir) similarity_path = hf_hub_download(repo_id=repo_id, filename="model_similarity.pkl", cache_dir=cache_dir) feature_vector = hf_hub_download(repo_id=repo_id, filename="feature_vector.pkl", cache_dir=cache_dir) with open(model_vectorizer, 'rb') as vec_file, open(similarity_path, 'rb') as sim_file: vectorizer = pickle.load(vec_file) similarity = pickle.load(sim_file) def recommend_movies_with_desc(query): # Transform the query into a feature vector using the same vectorizer feature_vecto = vectorizer.transform(query) with open(feature_vector, 'rb') as feature: feature_vectors = pickle.load(feature) # Calculate cosine similarity between the query vector and the feature vectors of the movies sim = cosine_similarity(feature_vectors, feature_vecto) # Extract the similarity scores for the query against all movies combined_similarity = sim.flatten() # Sort the movies by similarity score sorted_similar_movies = list(enumerate(combined_similarity)) sorted_similar_movies = sorted(sorted_similar_movies, key=lambda x: x[1], reverse=True) # Print out the top 5 similar movies i = 1 movie_recom=[] for movie in sorted_similar_movies: index = movie[0] # title_from_index = movies_data.iloc[index]['title'] # Assuming movies_data is a DataFrame dataFromtitle = movies_data[movies_data.index == index] movie_recom.append({'title':dataFromtitle['title'].values[0],'image':dataFromtitle['poster'].values[0]}) print(i, '.',dataFromtitle['title'].values[0], "(Score:", round(movie[1], 2), ")") i += 1 if i > 35: # Limit recommendations to top 5 break return movie_recom