File size: 2,176 Bytes
87b43cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9063ba0
 
 
 
a942894
87b43cf
a942894
87b43cf
 
 
 
 
9063ba0
5423e57
 
 
ad0715b
87b43cf
 
 
a942894
87b43cf
a942894
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr

def find_similar_items(stock_code):
    # Read data source Excel files.
    df1 = pd.read_excel('Online_Retail.xlsx')

    # Check dataframe information.
    df1a = df1.dropna(subset=['CustomerID'])

    # Create CustomerID vs Item (Purchased Items, by StockCode) matrix by pivot table function.
    CustomerID_Item_matrix = df1a.pivot_table(
        index='CustomerID', 
        columns='StockCode', 
        values='Quantity',
        aggfunc='sum'
    )

    # Update illustration of the matrix, 1 to represent customer have purchased item, 0 to represent customer haven't purchased.
    CustomerID_Item_matrix = CustomerID_Item_matrix.applymap(lambda x: 1 if x > 0 else 0)

    # Create Item to Item similarity matrix.
    item_item_similarity_matrix = pd.DataFrame(
        cosine_similarity(CustomerID_Item_matrix.T)
    )

    # Update index to corresponding Item Code (StockCode).
    item_item_similarity_matrix.columns = CustomerID_Item_matrix.T.index
    item_item_similarity_matrix['StockCode'] = CustomerID_Item_matrix.T.index
    item_item_similarity_matrix = item_item_similarity_matrix.set_index('StockCode')

    # Convert stock_code to integer
    stock_code = int(stock_code)

    # Find the top 5 most similar items
    top_5_similar_items = list(
        item_item_similarity_matrix\
            .loc[stock_code]\
            .sort_values(ascending=False)\
            .iloc[:5]\
        .index
    )

    # Return the list of similar items of the given StockCode with item Description.
    results_df = df1a.loc[df1a['StockCode'].isin(top_5_similar_items), 'Description'].drop_duplicates().to_frame().reset_index(drop=True)
    results_list = results_df.to_string(header=False, index=False).split('\n')
    results_list.insert(1, '-'*50)  # Insert dotted line after the first item
    return "\n".join(results_list)

# Set up the interface
stock_code_input = gr.inputs.Textbox(label="Enter Stock Code:")
output_table = gr.outputs.Textbox(label="Recommended Items")

gr.Interface(fn=find_similar_items, inputs=stock_code_input, outputs=output_table).launch()