Spaces:

Navanihk
/

recommendationSystembackend

Sleeping

File size: 3,137 Bytes

import pickle
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd
import os
import difflib

from huggingface_hub import hf_hub_download

repo_id = "Navanihk/recommendationsystemmovie"
cache_dir = '/tmp/hf_cache'
os.makedirs(cache_dir, exist_ok=True)
def load_data():
    try:
        
        # Download the CSV file
        csv_path = hf_hub_download(repo_id=repo_id, filename="movieswithposter_updated.csv", cache_dir=cache_dir)
        # Load as DataFrame
        movies_data = pd.read_csv(csv_path)
        return movies_data
    except Exception as e:
        print(f"Error loading data from Hugging Face: {e}")
        # Fallback to local file if available
        if os.path.exists('./movieswithposter_updated.csv'):
            return pd.read_csv('./movieswithposter_updated.csv')
        else:
            raise

# Load movie data
movies_data = load_data()
vectorizer_path = hf_hub_download(repo_id=repo_id, filename="model_vectorizer.pkl", cache_dir=cache_dir)
similarity_path = hf_hub_download(repo_id=repo_id, filename="model_similarity.pkl", cache_dir=cache_dir)
def recommend_movies(movie_name):
    # Add the movie to the user's history
    if vectorizer_path and similarity_path:
    # Load the vectorizer and similarity matrix
        with open(vectorizer_path, 'rb') as vec_file, open(similarity_path, 'rb') as sim_file:
            vectorizer = pickle.load(vec_file)
            similarity = pickle.load(sim_file)

    print(f"Movies suggested for you based on your past choices: \n")

    # Create an aggregate similarity score across all movies in history
    combined_similarity = np.zeros(similarity.shape[0])
    
    for past_movie in [movie_name]:
        # Find a close match for each movie in the user's history
        list_of_all_titles = movies_data['title'].tolist()
        find_close_match = difflib.get_close_matches(past_movie, list_of_all_titles)

        if find_close_match:
            close_match = find_close_match[0]
            # Find the index of the movie in the dataset
            index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]
            # Accumulate the similarity scores
            combined_similarity += similarity[index_of_the_movie]

    # Sort movies based on the combined similarity score
    sorted_similar_movies = list(enumerate(combined_similarity))
    sorted_similar_movies = sorted(sorted_similar_movies, key=lambda x: x[1], reverse=True)

    # Recommend the top movies that the user hasn't already seen
    i = 1
    movie_returns = []
    for movie in sorted_similar_movies:
        index = movie[0]
        # title_from_index = movies_data[movies_data.index == index]['title'].values[0]
        dataFromtitle = movies_data[movies_data.index == index]
        movie_returns.append({'title':dataFromtitle['title'].values[0],'image':dataFromtitle['poster'].values[0]})
        print(i, '.',dataFromtitle['title'].values[0], "(Score:", round(movie[1], 2), ")")
        
        i+=1
        if i > 35:  # Limit recommendations to top 5
                break
    return movie_returns