|
import pickle |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
import numpy as np |
|
import pandas as pd |
|
import os |
|
from huggingface_hub import hf_hub_download |
|
import nltk |
|
repo_id = "Navanihk/recommendationsystemmovie" |
|
cache_dir = '/tmp/hf_cache' |
|
os.makedirs(cache_dir, exist_ok=True) |
|
nltk.data.path.append(cache_dir) |
|
nltk.download('punkt', download_dir=cache_dir) |
|
nltk.download('punkt_tab',download_dir=cache_dir) |
|
def load_data(): |
|
try: |
|
|
|
|
|
|
|
csv_path = hf_hub_download(repo_id=repo_id, filename="movieswithposter_updated.csv", cache_dir=cache_dir) |
|
|
|
movies_data = pd.read_csv(csv_path) |
|
return movies_data |
|
except Exception as e: |
|
print(f"Error loading data from Hugging Face: {e}") |
|
|
|
if os.path.exists('./movieswithposter_updated.csv'): |
|
return pd.read_csv('./movieswithposter_updated.csv') |
|
else: |
|
raise |
|
|
|
|
|
movies_data = load_data() |
|
model_vectorizer = hf_hub_download(repo_id=repo_id, filename="model_vectorizer.pkl", cache_dir=cache_dir) |
|
similarity_path = hf_hub_download(repo_id=repo_id, filename="model_similarity.pkl", cache_dir=cache_dir) |
|
feature_vector = hf_hub_download(repo_id=repo_id, filename="feature_vector.pkl", cache_dir=cache_dir) |
|
with open(model_vectorizer, 'rb') as vec_file, open(similarity_path, 'rb') as sim_file: |
|
vectorizer = pickle.load(vec_file) |
|
similarity = pickle.load(sim_file) |
|
def recommend_movies_with_desc(query): |
|
|
|
feature_vecto = vectorizer.transform(query) |
|
with open(feature_vector, 'rb') as feature: |
|
feature_vectors = pickle.load(feature) |
|
|
|
|
|
sim = cosine_similarity(feature_vectors, feature_vecto) |
|
|
|
|
|
combined_similarity = sim.flatten() |
|
|
|
|
|
sorted_similar_movies = list(enumerate(combined_similarity)) |
|
sorted_similar_movies = sorted(sorted_similar_movies, key=lambda x: x[1], reverse=True) |
|
|
|
|
|
i = 1 |
|
movie_recom=[] |
|
for movie in sorted_similar_movies: |
|
index = movie[0] |
|
|
|
dataFromtitle = movies_data[movies_data.index == index] |
|
movie_recom.append({'title':dataFromtitle['title'].values[0],'image':dataFromtitle['poster'].values[0]}) |
|
print(i, '.',dataFromtitle['title'].values[0], "(Score:", round(movie[1], 2), ")") |
|
i += 1 |
|
if i > 35: |
|
break |
|
return movie_recom |