Spaces:

Navanihk
/

recommendationSystembackend

Sleeping

File size: 2,842 Bytes

669d4ab
 
 
 
929d1c0
669d4ab
bc0ceb6
669d4ab
6f16ef5
929d1c0
bc0ceb6
 
d818e73
669d4ab
 
 
 
929d1c0
 
669d4ab
 
 
 
 
 
 
 
 
 
 
 
 
2650ad9
 
427981d
669d4ab
 
 
 
 
 
427981d
669d4ab

import pickle
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd
import os
from huggingface_hub import hf_hub_download
import nltk
repo_id = "Navanihk/recommendationsystemmovie"
cache_dir = '/tmp/hf_cache'
os.makedirs(cache_dir, exist_ok=True)
nltk.data.path.append(cache_dir)
nltk.download('punkt', download_dir=cache_dir)
nltk.download('punkt_tab',download_dir=cache_dir)
def load_data():
    try:
        
        # Download the CSV file
         # Download the CSV file
        csv_path = hf_hub_download(repo_id=repo_id, filename="movieswithposter_updated.csv", cache_dir=cache_dir)
        # Load as DataFrame
        movies_data = pd.read_csv(csv_path)
        return movies_data
    except Exception as e:
        print(f"Error loading data from Hugging Face: {e}")
        # Fallback to local file if available
        if os.path.exists('./movieswithposter_updated.csv'):
            return pd.read_csv('./movieswithposter_updated.csv')
        else:
            raise

# Load movie data
movies_data = load_data()
model_vectorizer = hf_hub_download(repo_id=repo_id, filename="model_vectorizer.pkl", cache_dir=cache_dir)
similarity_path = hf_hub_download(repo_id=repo_id, filename="model_similarity.pkl", cache_dir=cache_dir)
feature_vector = hf_hub_download(repo_id=repo_id, filename="feature_vector.pkl", cache_dir=cache_dir)
with open(model_vectorizer, 'rb') as vec_file, open(similarity_path, 'rb') as sim_file:
        vectorizer = pickle.load(vec_file)
        similarity = pickle.load(sim_file)
def recommend_movies_with_desc(query):
# Transform the query into a feature vector using the same vectorizer
  feature_vecto = vectorizer.transform(query)
  with open(feature_vector, 'rb') as feature:
        feature_vectors = pickle.load(feature)

  # Calculate cosine similarity between the query vector and the feature vectors of the movies
  sim = cosine_similarity(feature_vectors, feature_vecto)

  # Extract the similarity scores for the query against all movies
  combined_similarity = sim.flatten()

  # Sort the movies by similarity score
  sorted_similar_movies = list(enumerate(combined_similarity))
  sorted_similar_movies = sorted(sorted_similar_movies, key=lambda x: x[1], reverse=True)

  # Print out the top 5 similar movies
  i = 1
  movie_recom=[]
  for movie in sorted_similar_movies:
      index = movie[0]
#       title_from_index = movies_data.iloc[index]['title']  # Assuming movies_data is a DataFrame
      dataFromtitle = movies_data[movies_data.index == index]
      movie_recom.append({'title':dataFromtitle['title'].values[0],'image':dataFromtitle['poster'].values[0]})
      print(i, '.',dataFromtitle['title'].values[0], "(Score:", round(movie[1], 2), ")")
      i += 1
      if i > 35:  # Limit recommendations to top 5
          break
  return movie_recom