Spaces:

Navanihk
/

recommendationSystembackend

Sleeping

recommendationSystembackend / recommendwithdesc.py

d818e73 14 days ago

2.84 kB

	import pickle
	from sklearn.metrics.pairwise import cosine_similarity
	import numpy as np
	import pandas as pd
	import os
	from huggingface_hub import hf_hub_download
	import nltk
	repo_id = "Navanihk/recommendationsystemmovie"
	cache_dir = '/tmp/hf_cache'
	os.makedirs(cache_dir, exist_ok=True)
	nltk.data.path.append(cache_dir)
	nltk.download('punkt', download_dir=cache_dir)
	nltk.download('punkt_tab',download_dir=cache_dir)
	def load_data():
	try:

	# Download the CSV file
	# Download the CSV file
	csv_path = hf_hub_download(repo_id=repo_id, filename="movieswithposter_updated.csv", cache_dir=cache_dir)
	# Load as DataFrame
	movies_data = pd.read_csv(csv_path)
	return movies_data
	except Exception as e:
	print(f"Error loading data from Hugging Face: {e}")
	# Fallback to local file if available
	if os.path.exists('./movieswithposter_updated.csv'):
	return pd.read_csv('./movieswithposter_updated.csv')
	else:
	raise

	# Load movie data
	movies_data = load_data()
	model_vectorizer = hf_hub_download(repo_id=repo_id, filename="model_vectorizer.pkl", cache_dir=cache_dir)
	similarity_path = hf_hub_download(repo_id=repo_id, filename="model_similarity.pkl", cache_dir=cache_dir)
	feature_vector = hf_hub_download(repo_id=repo_id, filename="feature_vector.pkl", cache_dir=cache_dir)
	with open(model_vectorizer, 'rb') as vec_file, open(similarity_path, 'rb') as sim_file:
	vectorizer = pickle.load(vec_file)
	similarity = pickle.load(sim_file)
	def recommend_movies_with_desc(query):
	# Transform the query into a feature vector using the same vectorizer
	feature_vecto = vectorizer.transform(query)
	with open(feature_vector, 'rb') as feature:
	feature_vectors = pickle.load(feature)

	# Calculate cosine similarity between the query vector and the feature vectors of the movies
	sim = cosine_similarity(feature_vectors, feature_vecto)

	# Extract the similarity scores for the query against all movies
	combined_similarity = sim.flatten()

	# Sort the movies by similarity score
	sorted_similar_movies = list(enumerate(combined_similarity))
	sorted_similar_movies = sorted(sorted_similar_movies, key=lambda x: x[1], reverse=True)

	# Print out the top 5 similar movies
	i = 1
	movie_recom=[]
	for movie in sorted_similar_movies:
	index = movie[0]
	# title_from_index = movies_data.iloc[index]['title'] # Assuming movies_data is a DataFrame
	dataFromtitle = movies_data[movies_data.index == index]
	movie_recom.append({'title':dataFromtitle['title'].values[0],'image':dataFromtitle['poster'].values[0]})
	print(i, '.',dataFromtitle['title'].values[0], "(Score:", round(movie[1], 2), ")")
	i += 1
	if i > 35: # Limit recommendations to top 5
	break
	return movie_recom