import pandas as pd from sklearn.metrics.pairwise import cosine_similarity import gradio as gr def find_similar_items(stock_code): # Read data source Excel files. df1 = pd.read_excel('Online_Retail.xlsx') # Check dataframe information. df1a = df1.dropna(subset=['CustomerID']) # Create CustomerID vs Item (Purchased Items, by StockCode) matrix by pivot table function. CustomerID_Item_matrix = df1a.pivot_table( index='CustomerID', columns='StockCode', values='Quantity', aggfunc='sum' ) # Update illustration of the matrix, 1 to represent customer have purchased item, 0 to represent customer haven't purchased. CustomerID_Item_matrix = CustomerID_Item_matrix.applymap(lambda x: 1 if x > 0 else 0) # Create Item to Item similarity matrix. item_item_similarity_matrix = pd.DataFrame( cosine_similarity(CustomerID_Item_matrix.T) ) # Update index to corresponding Item Code (StockCode). item_item_similarity_matrix.columns = CustomerID_Item_matrix.T.index item_item_similarity_matrix['StockCode'] = CustomerID_Item_matrix.T.index item_item_similarity_matrix = item_item_similarity_matrix.set_index('StockCode') # Convert stock_code to integer stock_code = int(stock_code) # Find the top 5 most similar items top_5_similar_items = list( item_item_similarity_matrix\ .loc[stock_code]\ .sort_values(ascending=False)\ .iloc[:5]\ .index ) # Return the list of similar items of the given StockCode with item Description. results_df = df1a.loc[df1a['StockCode'].isin(top_5_similar_items), ['StockCode', 'Description']].drop_duplicates().set_index('StockCode').loc[top_5_similar_items] results_str = "\n" + results_df.to_string() results_list = results_str.split('\n') results_list.insert(4, '-'*50) # Insert dotted line after the 3rd item return "\n".join(results_list) # Set up the interface stock_code_input = gr.inputs.Textbox(label="Enter Stock Code:") output_table = gr.outputs.Textbox(label="Recommended Items") gr.Interface(fn=find_similar_items, inputs=stock_code_input, outputs=output_table).launch()