import pandas as pd | |
from sklearn.metrics.pairwise import cosine_similarity | |
import gradio as gr | |
def find_similar_items(stock_code): | |
# Read data source Excel files. | |
df1 = pd.read_excel('Online_Retail.xlsx') | |
# Check dataframe information. | |
df1a = df1.dropna(subset=['CustomerID']) | |
# Create CustomerID vs Item (Purchased Items, by StockCode) matrix by pivot table function. | |
CustomerID_Item_matrix = df1a.pivot_table( | |
index='CustomerID', | |
columns='StockCode', | |
values='Quantity', | |
aggfunc='sum' | |
) | |
# Update illustration of the matrix, 1 to represent customer have purchased item, 0 to represent customer haven't purchased. | |
CustomerID_Item_matrix = CustomerID_Item_matrix.applymap(lambda x: 1 if x > 0 else 0) | |
# Create Item to Item similarity matrix. | |
item_item_similarity_matrix = pd.DataFrame( | |
cosine_similarity(CustomerID_Item_matrix.T) | |
) | |
# Update index to corresponding Item Code (StockCode). | |
item_item_similarity_matrix.columns = CustomerID_Item_matrix.T.index | |
item_item_similarity_matrix['StockCode'] = CustomerID_Item_matrix.T.index | |
item_item_similarity_matrix = item_item_similarity_matrix.set_index('StockCode') | |
# Convert stock_code to integer | |
stock_code = int(stock_code) | |
# Find the top 5 most similar items | |
top_5_similar_items = list( | |
item_item_similarity_matrix\ | |
.loc[stock_code]\ | |
.sort_values(ascending=False)\ | |
.iloc[:5]\ | |
.index | |
) | |
# Return the list of similar items of the given StockCode with item Description. | |
results_df = df1a.loc[df1a['StockCode'].isin(top_5_similar_items), ['StockCode', 'Description']].drop_duplicates().set_index('StockCode').loc[top_5_similar_items] | |
return "\n" + results_df.to_string() | |
# Set up the interface | |
stock_code_input = gr.inputs.Textbox(label="Enter Stock Code:") | |
output_table = gr.outputs.Textbox(label="Recommended Items") | |
gr.Interface(fn=find_similar_items, inputs=stock_code_input, outputs=output_table).launch() | |