Spaces:

Navanihk
/

recommendationSystembackend

Sleeping

App Files Files Community

recommendationSystembackend / recommend_normal.py

Navanihk

test

427981d 14 days ago

raw

history blame contribute delete

3.14 kB

	import pickle
	from sklearn.metrics.pairwise import cosine_similarity
	import numpy as np
	import pandas as pd
	import os
	import difflib

	from huggingface_hub import hf_hub_download

	repo_id = "Navanihk/recommendationsystemmovie"
	cache_dir = '/tmp/hf_cache'
	os.makedirs(cache_dir, exist_ok=True)
	def load_data():
	try:

	# Download the CSV file
	csv_path = hf_hub_download(repo_id=repo_id, filename="movieswithposter_updated.csv", cache_dir=cache_dir)
	# Load as DataFrame
	movies_data = pd.read_csv(csv_path)
	return movies_data
	except Exception as e:
	print(f"Error loading data from Hugging Face: {e}")
	# Fallback to local file if available
	if os.path.exists('./movieswithposter_updated.csv'):
	return pd.read_csv('./movieswithposter_updated.csv')
	else:
	raise

	# Load movie data
	movies_data = load_data()
	vectorizer_path = hf_hub_download(repo_id=repo_id, filename="model_vectorizer.pkl", cache_dir=cache_dir)
	similarity_path = hf_hub_download(repo_id=repo_id, filename="model_similarity.pkl", cache_dir=cache_dir)
	def recommend_movies(movie_name):
	# Add the movie to the user's history
	if vectorizer_path and similarity_path:
	# Load the vectorizer and similarity matrix
	with open(vectorizer_path, 'rb') as vec_file, open(similarity_path, 'rb') as sim_file:
	vectorizer = pickle.load(vec_file)
	similarity = pickle.load(sim_file)

	print(f"Movies suggested for you based on your past choices: \n")

	# Create an aggregate similarity score across all movies in history
	combined_similarity = np.zeros(similarity.shape[0])

	for past_movie in [movie_name]:
	# Find a close match for each movie in the user's history
	list_of_all_titles = movies_data['title'].tolist()
	find_close_match = difflib.get_close_matches(past_movie, list_of_all_titles)

	if find_close_match:
	close_match = find_close_match[0]
	# Find the index of the movie in the dataset
	index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]
	# Accumulate the similarity scores
	combined_similarity += similarity[index_of_the_movie]

	# Sort movies based on the combined similarity score
	sorted_similar_movies = list(enumerate(combined_similarity))
	sorted_similar_movies = sorted(sorted_similar_movies, key=lambda x: x[1], reverse=True)

	# Recommend the top movies that the user hasn't already seen
	i = 1
	movie_returns = []
	for movie in sorted_similar_movies:
	index = movie[0]
	# title_from_index = movies_data[movies_data.index == index]['title'].values[0]
	dataFromtitle = movies_data[movies_data.index == index]
	movie_returns.append({'title':dataFromtitle['title'].values[0],'image':dataFromtitle['poster'].values[0]})
	print(i, '.',dataFromtitle['title'].values[0], "(Score:", round(movie[1], 2), ")")

	i+=1
	if i > 35: # Limit recommendations to top 5
	break
	return movie_returns