bpHigh commited on
Commit
0f1eb8b
·
1 Parent(s): 4fdeb0d

Add Gradio Blocks Interface and Complete the Space

Browse files
Files changed (1) hide show
  1. app.py +158 -1
app.py CHANGED
@@ -7,5 +7,162 @@ from pathlib import Path
7
  import matplotlib.pyplot as plt
8
  import gradio as gr
9
  from huggingface_hub import from_pretrained_keras
 
 
 
10
 
11
- model = from_pretrained_keras("bpHigh/Node2Vec_MovieLens")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  import matplotlib.pyplot as plt
8
  import gradio as gr
9
  from huggingface_hub import from_pretrained_keras
10
+ from collections import defaultdict
11
+ import math
12
+ import networkx as nx
13
 
14
+ model = from_pretrained_keras("bpHigh/Node2Vec_MovieLens")
15
+
16
+ # Download the actual data from http://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
17
+ movielens_data_file_url = "http://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
18
+ movielens_zipped_file = keras.utils.get_file("ml-latest-small.zip", movielens_data_file_url, extract=False)
19
+ keras_datasets_path = Path(movielens_zipped_file).parents[0]
20
+ movielens_dir = keras_datasets_path / "ml-latest-small"
21
+
22
+ # Only extract the data the first time the script is run.
23
+ if not movielens_dir.exists():
24
+ with ZipFile(movielens_zipped_file, "r") as zip:
25
+ # Extract files
26
+ print("Extracting all the files now...")
27
+ zip.extractall(path=keras_datasets_path)
28
+ print("Done!")
29
+
30
+ # Read the Movies csv
31
+ movies = pd.read_csv(f"{movielens_dir}/movies.csv")
32
+ # Create a `movieId` string.
33
+ movies["movieId"] = movies["movieId"].apply(lambda x: f"movie_{x}")
34
+
35
+ # Load ratings to a DataFrame.
36
+ ratings = pd.read_csv(f"{movielens_dir}/ratings.csv")
37
+ # Convert the `ratings` to floating point
38
+ ratings["rating"] = ratings["rating"].apply(lambda x: float(x))
39
+ # Create the `movie_id` string.
40
+ ratings["movieId"] = ratings["movieId"].apply(lambda x: f"movie_{x}")
41
+
42
+ # Implement two utility functions for the movies DataFrame.
43
+ def get_movie_title_by_id(movieId):
44
+ return list(movies[movies.movieId == movieId].title)[0]
45
+
46
+
47
+ def get_movie_id_by_title(title):
48
+ return list(movies[movies.title == title].movieId)[0]
49
+
50
+ # Create Weighted Edges between movies
51
+ min_rating = 5
52
+ pair_frequency = defaultdict(int)
53
+ item_frequency = defaultdict(int)
54
+
55
+ # Filter instances where rating is greater than or equal to min_rating.
56
+ rated_movies = ratings[ratings.rating >= min_rating]
57
+ # Group instances by user.
58
+ movies_grouped_by_users = list(rated_movies.groupby("userId"))
59
+ for group in movies_grouped_by_users:
60
+ # Get a list of movies rated by the user.
61
+ current_movies = list(group[1]["movieId"])
62
+
63
+ for i in range(len(current_movies)):
64
+ item_frequency[current_movies[i]] += 1
65
+ for j in range(i + 1, len(current_movies)):
66
+ x = min(current_movies[i], current_movies[j])
67
+ y = max(current_movies[i], current_movies[j])
68
+ pair_frequency[(x, y)] += 1
69
+
70
+ # Create the graph with the nodes and the edges
71
+
72
+ min_weight = 10
73
+ D = math.log(sum(item_frequency.values()))
74
+
75
+ # Create the movies undirected graph.
76
+ movies_graph = nx.Graph()
77
+ # Add weighted edges between movies.
78
+ # This automatically adds the movie nodes to the graph.
79
+ for pair in pair_frequency:
80
+ x, y = pair
81
+ xy_frequency = pair_frequency[pair]
82
+ x_frequency = item_frequency[x]
83
+ y_frequency = item_frequency[y]
84
+ pmi = math.log(xy_frequency) - math.log(x_frequency) - math.log(y_frequency) + D
85
+ weight = pmi * xy_frequency
86
+ # Only include edges with weight >= min_weight.
87
+ if weight >= min_weight:
88
+ movies_graph.add_edge(x, y, weight=weight)
89
+ # Create vocabulary and a mapping from tokens to integer indices
90
+ vocabulary = ["NA"] + list(movies_graph.nodes)
91
+ vocabulary_lookup = {token: idx for idx, token in enumerate(vocabulary)}
92
+
93
+ # Analyze the learnt embeddings.
94
+ movie_embeddings = model.get_layer("item_embeddings").get_weights()[0]
95
+
96
+ # Find Related Movies
97
+ movie_titles = list(movies['title'])
98
+
99
+ def find_related_movies(movie_title, k):
100
+ k = int(k)
101
+ query_embeddings = []
102
+ movieId = get_movie_id_by_title(movie_title)
103
+ token_id = vocabulary_lookup[movieId]
104
+ query_embedding = movie_embeddings[token_id]
105
+ query_embeddings.append(query_embedding)
106
+ query_embeddings = np.array(query_embeddings)
107
+
108
+ similarities = tf.linalg.matmul(
109
+ tf.math.l2_normalize(query_embeddings),
110
+ tf.math.l2_normalize(movie_embeddings),
111
+ transpose_b=True,
112
+ )
113
+ _, indices = tf.math.top_k(similarities, k)
114
+ indices = indices.numpy().tolist()
115
+ similar_tokens = indices[0]
116
+ related_movies = []
117
+
118
+ for token in similar_tokens:
119
+ similar_movieId = vocabulary[token]
120
+ similar_title = get_movie_title_by_id(similar_movieId)
121
+ related_movies.append(similar_title)
122
+
123
+ related_movies_df = pd.DataFrame({'Related Movies':related_movies})
124
+ return related_movies_df
125
+
126
+
127
+
128
+ demo = gr.Blocks()
129
+ with demo:
130
+ gr.Markdown("""
131
+ <div>
132
+ <h1 style='text-align: center'>Find Related Movies</h1>
133
+ Choose the specific movie from the dropdown and see the top k related Movies
134
+
135
+ Note: The dropdown menu provides movie options from the Movielens dataset.
136
+ </div>
137
+ """)
138
+
139
+ with gr.Box():
140
+ gr.Markdown(
141
+ """
142
+ ### Input
143
+ #### Select a movie to find other related movies.
144
+ """)
145
+
146
+ inp1 = gr.Dropdown(movie_titles)
147
+ gr.Markdown(
148
+ """
149
+ <br>
150
+ """)
151
+ gr.Markdown(
152
+ """
153
+ #### Number of related movies you wanna find?
154
+ """)
155
+ inp2 = gr.Number()
156
+ btn = gr.Button("Run")
157
+
158
+ with gr.Box():
159
+ gr.Markdown(
160
+ """
161
+ ### Output
162
+ #### Top K related movies.
163
+ """)
164
+ df1 = gr.DataFrame(headers=["title"], datatype=["str"], interactive=False)
165
+
166
+ btn.click(fn=find_related_movies, inputs=[inp1,inp2], outputs=df1)
167
+
168
+ demo.launch(debug=True)