Ahmud commited on
Commit
3c92b7e
·
verified ·
1 Parent(s): 1ca2d7f

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +565 -0
  2. embeddings_metadata.pkl +3 -0
  3. packages.txt +2 -0
  4. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,565 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ import numpy as np
4
+ import torch
5
+ import torch.nn.functional as F
6
+ from PIL import Image, UnidentifiedImageError
7
+ from sklearn.metrics.pairwise import cosine_similarity
8
+ import os
9
+ from pdf2image import convert_from_path
10
+ from streamlit_cropper import st_cropper
11
+ import easyocr
12
+ from reportlab.lib.pagesizes import letter
13
+ from reportlab.pdfgen import canvas
14
+ from reportlab.lib.utils import ImageReader
15
+ import io
16
+ import base64
17
+
18
+ # -------------------
19
+ # Set page config (must be done before other elements)
20
+ # -------------------
21
+ st.set_page_config(
22
+ page_title="Mobica Find",
23
+ )
24
+
25
+ # Inject custom CSS to force a black background
26
+ st.markdown(
27
+ """
28
+ <style>
29
+ .stApp {
30
+ background-color: black;
31
+ color: white; /* Ensures your text is visible on black background */
32
+ }
33
+ </style>
34
+ """,
35
+ unsafe_allow_html=True
36
+ )
37
+
38
+ # ---------------
39
+ # Inject top-left logo
40
+ # ---------------
41
+ logo_path = r"E:\Mobica\pdf_parser\logo_mobica.png"
42
+ with open(logo_path, "rb") as f:
43
+ logo_bytes = f.read()
44
+ encoded_logo = base64.b64encode(logo_bytes).decode()
45
+
46
+ st.markdown(
47
+ f"""
48
+ <style>
49
+ .top-left-logo {{
50
+ position: fixed;
51
+ top: 1rem;
52
+ left: 1rem;
53
+ z-index: 9999;
54
+ }}
55
+ </style>
56
+ <div class="top-left-logo">
57
+ <img src="data:image/png;base64,{encoded_logo}" width="240">
58
+ </div>
59
+ """,
60
+ unsafe_allow_html=True
61
+ )
62
+
63
+ # --------------------
64
+ # Load Processor, Model, and Metadata
65
+ # --------------------
66
+ @st.cache_resource()
67
+ def load_resources():
68
+ model_name = "kakaobrain/align-base"
69
+
70
+ # Load processor and model directly from Hugging Face
71
+ processor = AutoProcessor.from_pretrained(model_name)
72
+ model = AlignModel.from_pretrained(model_name)
73
+
74
+ # Move model to GPU if available
75
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
76
+ model.to(device)
77
+
78
+ return processor, model
79
+
80
+ processor, model = load_resources()
81
+
82
+
83
+ def extract_text_with_easyocr(image, language="en"):
84
+ """ Extracts text from an image using EasyOCR. """
85
+ try:
86
+ results = reader.readtext(np.array(image), detail=0) # Get only text results
87
+ return " ".join(results) if results else ""
88
+ except Exception as e:
89
+ st.error(f"Error during OCR: {e}")
90
+ return ""
91
+
92
+ # --------------------
93
+ # Embedding Functions
94
+ # --------------------
95
+ def get_image_embedding(image):
96
+ """Return normalized image embedding."""
97
+ image_inputs = processor(images=image, return_tensors="pt")
98
+ image_outputs = model.get_image_features(**image_inputs)
99
+ return F.normalize(image_outputs, dim=1).detach().cpu().numpy()
100
+
101
+ def get_text_embedding(text):
102
+ """Return normalized text embedding."""
103
+ text_inputs = processor(text=[text], return_tensors="pt", padding=True, truncation=True)
104
+ text_outputs = model.get_text_features(**text_inputs)
105
+ return F.normalize(text_outputs, dim=1).detach().cpu().numpy()
106
+
107
+ # --------------------
108
+ # Search Function
109
+ # --------------------
110
+ def find_most_similar_products(
111
+ image=None,
112
+ description=None,
113
+ n=3,
114
+ combine_method="none" # "none" (image-only), "text-only", or "average" for combining
115
+ ):
116
+ """
117
+ Returns the top-n most similar products based on the specified method:
118
+ - image-only
119
+ - description-only
120
+ - both (average of embeddings)
121
+ """
122
+ # Prepare the query embedding
123
+ if combine_method == "none" and image is not None:
124
+ query_embed = get_image_embedding(image) # image-only
125
+ elif combine_method == "text-only" and description is not None:
126
+ query_embed = get_text_embedding(description) # text-only
127
+ else:
128
+ # "average" => must have both image & description
129
+ img_emb = get_image_embedding(image)
130
+ txt_emb = get_text_embedding(description)
131
+ query_embed = (img_emb + txt_emb) / 2.0 # simple average
132
+
133
+ similarities = []
134
+
135
+ # Loop through each product in metadata and compute similarity
136
+ for entry in embeddings_metadata.values():
137
+ image_similarities = []
138
+ for emb_path in entry.get("image_embedding_paths", []):
139
+ emb_path = os.path.normpath(emb_path)
140
+ if os.path.exists(emb_path):
141
+ stored_embedding = np.load(emb_path)
142
+ # Cosine similarity
143
+ image_similarities.append(cosine_similarity(query_embed, stored_embedding).mean())
144
+
145
+ # Average all image sims in the product
146
+ overall_score = np.mean(image_similarities) if image_similarities else 0
147
+
148
+ if overall_score > 0:
149
+ similarities.append((overall_score, entry))
150
+
151
+ # Sort descending by similarity
152
+ return sorted(similarities, key=lambda x: x[0], reverse=True)[:n]
153
+
154
+ # --------------------
155
+ # Session State Setup
156
+ # --------------------
157
+ if "pdf_crops" not in st.session_state:
158
+ # We'll store pairs (snippet_image, product_image) for each page
159
+ st.session_state["pdf_crops"] = []
160
+
161
+ if "results" not in st.session_state:
162
+ st.session_state["results"] = []
163
+
164
+ # --------------------
165
+ # APP UI
166
+ # --------------------
167
+ st.title("Mobica Find")
168
+
169
+ search_method = st.selectbox(
170
+ "Choose Search Method",
171
+ ["Upload PDF", "Image Only", "Description Only", "Both (Image + Description)"]
172
+ )
173
+
174
+ # -----------------------------------------------------------------------------
175
+ # 1) PDF METHOD
176
+ # -----------------------------------------------------------------------------
177
+ # -----------------------------------------------------------------------------
178
+ # 1) PDF METHOD
179
+ # -----------------------------------------------------------------------------
180
+
181
+
182
+ # Initialize EasyOCR reader (Supports multiple languages)
183
+ reader = easyocr.Reader(["en", "ar"]) # Add languages as needed
184
+
185
+ # -------------------
186
+ # Set page config (must be done before other elements)
187
+ # -------------------
188
+ st.set_page_config(
189
+ page_title="Mobica Find",
190
+ )
191
+
192
+ # Inject custom CSS to force a black background
193
+ st.markdown(
194
+ """
195
+ <style>
196
+ .stApp {
197
+ background-color: black;
198
+ color: white; /* Ensures your text is visible on black background */
199
+ }
200
+ </style>
201
+ """,
202
+ unsafe_allow_html=True
203
+ )
204
+
205
+ # ---------------
206
+ # Inject top-left logo
207
+ # ---------------
208
+ logo_path = r"E:\Mobica\pdf_parser\logo_mobica.png"
209
+ with open(logo_path, "rb") as f:
210
+ logo_bytes = f.read()
211
+ encoded_logo = base64.b64encode(logo_bytes).decode()
212
+
213
+ st.markdown(
214
+ f"""
215
+ <style>
216
+ .top-left-logo {{
217
+ position: fixed;
218
+ top: 1rem;
219
+ left: 1rem;
220
+ z-index: 9999;
221
+ }}
222
+ </style>
223
+ <div class="top-left-logo">
224
+ <img src="data:image/png;base64,{encoded_logo}" width="240">
225
+ </div>
226
+ """,
227
+ unsafe_allow_html=True
228
+ )
229
+
230
+ # --------------------
231
+ # Load Processor, Model, and Metadata
232
+ # --------------------
233
+ @st.cache_resource()
234
+ def load_resources():
235
+ with open(r"E:\Mobica\pdf_parser\Data Sheet\align_processor.pkl", "rb") as f:
236
+ processor = pickle.load(f)
237
+ with open(r"E:\Mobica\pdf_parser\Data Sheet\align_model.pkl", "rb") as f:
238
+ model = pickle.load(f)
239
+ with open(r"E:\Mobica\pdf_parser\Data Sheet\embeddings_metadata.pkl", "rb") as f:
240
+ embeddings_metadata = pickle.load(f)
241
+ return processor, model, embeddings_metadata
242
+
243
+ processor, model, embeddings_metadata = load_resources()
244
+
245
+ # --------------------
246
+ # OCR Function using EasyOCR
247
+ # --------------------
248
+ def extract_text_with_easyocr(image, language="en"):
249
+ """ Extracts text from an image using EasyOCR. """
250
+ try:
251
+ results = reader.readtext(np.array(image), detail=0) # Get only text results
252
+ return " ".join(results) if results else ""
253
+ except Exception as e:
254
+ st.error(f"Error during OCR: {e}")
255
+ return ""
256
+
257
+ # --------------------
258
+ # APP UI
259
+ # --------------------
260
+ st.title("Mobica Find")
261
+
262
+ search_method = st.selectbox(
263
+ "Choose Search Method",
264
+ ["Upload PDF", "Image Only", "Description Only", "Both (Image + Description)"]
265
+ )
266
+
267
+ # -----------------------------------------------------------------------------
268
+ # PDF Processing Section
269
+ # -----------------------------------------------------------------------------
270
+ if search_method == "Upload PDF":
271
+ st.subheader("Upload a PDF")
272
+ uploaded_pdf = st.file_uploader("Upload a PDF", type=["pdf"])
273
+
274
+ if uploaded_pdf:
275
+ pdf_path = f"temp_{uploaded_pdf.name}"
276
+ with open(pdf_path, "wb") as f:
277
+ f.write(uploaded_pdf.getbuffer())
278
+
279
+ st.write("Extracting pages from PDF...")
280
+ pages = convert_from_path(pdf_path, 300)
281
+
282
+ if pages:
283
+ page_num = st.number_input("Select Page Number", min_value=1, max_value=len(pages), value=1) - 1
284
+ page_image = pages[page_num]
285
+
286
+ # -------------------- Crop Snippet for OCR (description) --------------------
287
+ st.subheader("Crop Snippet from PDF for OCR")
288
+ cropped_img_pdf_snippet = st_cropper(page_image, realtime_update=True, box_color='#FF0000')
289
+
290
+ description_ocr = ""
291
+ if cropped_img_pdf_snippet:
292
+ cropped_img_pdf_snippet = cropped_img_pdf_snippet.convert("RGB")
293
+ st.image(cropped_img_pdf_snippet, caption="Cropped PDF Snippet (For OCR)")
294
+
295
+ # Use EasyOCR instead of Tesseract
296
+ selected_lang = st.selectbox("Select OCR Language", ["en", "ar", "en+ar"], index=0)
297
+ description_ocr = extract_text_with_easyocr(cropped_img_pdf_snippet, language=selected_lang)
298
+
299
+ if description_ocr:
300
+ st.success("OCR text extracted successfully!")
301
+ st.write("**Detected Text**:", description_ocr)
302
+ else:
303
+ st.warning("No text detected.")
304
+
305
+ # -------------------- Crop for product image --------------------
306
+ st.subheader("Crop the Product Image")
307
+ furniture_cropped_img = st_cropper(page_image, realtime_update=True, box_color='#00FF00')
308
+
309
+ if furniture_cropped_img:
310
+ furniture_cropped_img = furniture_cropped_img.convert("RGB")
311
+ st.image(furniture_cropped_img, caption="Cropped Product Image")
312
+
313
+ # -------------------- "Done" Button to save both crops --------------------
314
+ if st.button("Done"):
315
+ st.session_state.setdefault("pdf_crops", []).append(
316
+ (cropped_img_pdf_snippet, furniture_cropped_img)
317
+ )
318
+ st.success(f"Crop #{len(st.session_state['pdf_crops'])} saved!")
319
+
320
+ # -------------------- Show saved crops if any --------------------
321
+ if "pdf_crops" in st.session_state and len(st.session_state["pdf_crops"]) > 0:
322
+ st.subheader("📊 View Saved Crops")
323
+
324
+ crop_index = st.slider("Select Crop", 1, len(st.session_state["pdf_crops"]), 1) - 1
325
+ snippet_img, product_img = st.session_state["pdf_crops"][crop_index]
326
+
327
+ col1, col2 = st.columns(2)
328
+ with col1:
329
+ if snippet_img:
330
+ st.image(snippet_img, caption=f"Snippet Crop {crop_index+1}", use_column_width=True)
331
+ with col2:
332
+ if product_img:
333
+ st.image(product_img, caption=f"Product Crop {crop_index+1}", use_column_width=True)
334
+
335
+ if st.button(f"Delete Crop {crop_index+1}"):
336
+ st.session_state["pdf_crops"].pop(crop_index)
337
+ st.success(f"Crop {crop_index+1} deleted!")
338
+ st.experimental_rerun()
339
+
340
+
341
+ # -------------------- Let user choose how many similar products --------------------
342
+ n_similar = st.slider("How many similar products do you want?", 1, 10, 3)
343
+
344
+ # -------------------- "Find Similar Products" button --------------------
345
+ if st.button("Find Similar Products"):
346
+ st.session_state["results"] = []
347
+ # We'll do an image-based search using the product crop only
348
+ for snippet_img, product_img in st.session_state["pdf_crops"]:
349
+ if product_img is not None:
350
+ results_for_img = find_most_similar_products(
351
+ image=product_img,
352
+ n=n_similar,
353
+ combine_method="none" # image-only
354
+ )
355
+ st.session_state["results"].append(results_for_img)
356
+
357
+ st.success("Results generated!")
358
+
359
+ # -------------- Display results in the Streamlit GUI --------------
360
+ for i, results_for_img in enumerate(st.session_state["results"]):
361
+ st.write(f"**Results for Crop {i+1}**:")
362
+ if results_for_img:
363
+ for sim_score, matched_entry in results_for_img:
364
+ # Extract product code from the original image path
365
+ if "original_image_paths" in matched_entry and matched_entry["original_image_paths"]:
366
+ matched_img_path = os.path.normpath(matched_entry["original_image_paths"][0])
367
+ product_code = os.path.basename(matched_img_path).split('_')[0] # Extract product code
368
+
369
+ st.subheader(f"🔹 Match (Similarity: {sim_score:.4f})")
370
+ st.write(f"**Product Code:** {product_code}") # Display product code
371
+ st.write(f"**Description:** {matched_entry.get('description', 'No description')}")
372
+
373
+ # Show the first matched image (if available)
374
+ if os.path.exists(matched_img_path):
375
+ try:
376
+ img_matched = Image.open(matched_img_path).convert("RGB")
377
+ st.image(
378
+ img_matched,
379
+ caption=f"Matched Image (Sim: {sim_score:.4f})",
380
+ use_column_width=True
381
+ )
382
+ except UnidentifiedImageError:
383
+ st.warning(f"⚠️ Cannot open image: {matched_img_path}. It might be corrupted.")
384
+ else:
385
+ st.warning(f"⚠️ Image file not found: {matched_img_path}")
386
+ else:
387
+ st.warning(f"No similar products found for Crop {i+1}.")
388
+
389
+ # -------------------- Generate PDF if results are available --------------------
390
+ if len(st.session_state["results"]) > 0:
391
+ pdf_buffer = io.BytesIO()
392
+ pdf = canvas.Canvas(pdf_buffer, pagesize=letter)
393
+
394
+ # st.session_state["results"] is a list of lists
395
+ # st.session_state["pdf_crops"] is a list of (snippet_img, product_img)
396
+ for i, (snippet_img, product_img) in enumerate(st.session_state["pdf_crops"]):
397
+ pdf.drawString(100, 750, f"Crop {i+1}")
398
+
399
+ # Add cropped product image to PDF
400
+ if product_img:
401
+ img_byte_arr = io.BytesIO()
402
+ product_img.save(img_byte_arr, format='JPEG')
403
+ img_byte_arr.seek(0)
404
+ pdf.drawImage(ImageReader(img_byte_arr), 100, 550, width=200, height=150)
405
+
406
+ y_pos = 530
407
+ # Go through the matched results for this product
408
+ if i < len(st.session_state["results"]):
409
+ for sim_score, matched_entry in st.session_state["results"][i]:
410
+ if "original_image_paths" in matched_entry and len(matched_entry["original_image_paths"]) > 0:
411
+ matched_img_path = os.path.normpath(matched_entry["original_image_paths"][0])
412
+ product_code = os.path.basename(matched_img_path).split('_')[0] # Extract product code
413
+ pdf.drawString(100, y_pos, f"Product Code: {product_code}") # Add product code to PDF
414
+ #pdf.drawString(100, y_pos - 20, f"Similarity: {sim_score:.4f}")
415
+ y_pos -= 40
416
+ if os.path.exists(matched_img_path):
417
+ pdf.drawImage(matched_img_path, 350, y_pos - 50, width=150, height=100)
418
+ y_pos -= 120
419
+
420
+ pdf.showPage()
421
+
422
+ pdf.save()
423
+ pdf_buffer.seek(0)
424
+
425
+ st.download_button(
426
+ "📥 Download Results PDF",
427
+ pdf_buffer,
428
+ f"{uploaded_pdf.name}_results.pdf",
429
+ "application/pdf"
430
+ )
431
+
432
+ # -----------------------------------------------------------------------------
433
+ # 2) IMAGE ONLY
434
+ # -----------------------------------------------------------------------------
435
+ elif search_method == "Image Only":
436
+ st.subheader("Upload an Image")
437
+ uploaded_image = st.file_uploader("Select an Image", type=["png", "jpg", "jpeg"])
438
+
439
+ if uploaded_image is not None:
440
+ image_obj = Image.open(uploaded_image).convert("RGB")
441
+ st.image(image_obj, use_column_width=True)
442
+
443
+ # Let user choose how many similar products
444
+ n_similar = st.slider("How many similar products do you want?", 1, 10, 3)
445
+
446
+ # Button to trigger the search
447
+ if st.button("Find Similar Products"):
448
+ results = find_most_similar_products(
449
+ image=image_obj,
450
+ n=n_similar,
451
+ combine_method="none" # image-only
452
+ )
453
+
454
+ if results:
455
+ for sim_score, matched_entry in results:
456
+ st.subheader(f"🔹 Match (Similarity: {sim_score:.4f})")
457
+ st.write(f"**Description:** {matched_entry.get('description','No description')}")
458
+
459
+ # Display the first image of the matched entry
460
+ if "original_image_paths" in matched_entry and matched_entry["original_image_paths"]:
461
+ img_path = os.path.normpath(matched_entry["original_image_paths"][0]) # Normalize path
462
+ if os.path.exists(img_path):
463
+ try:
464
+ img_matched = Image.open(img_path).convert("RGB")
465
+ st.image(
466
+ img_matched,
467
+ caption=f"Matched Image (Sim: {sim_score:.4f})",
468
+ use_column_width=True
469
+ )
470
+ except UnidentifiedImageError:
471
+ st.warning(f"⚠️ Cannot open image: {img_path}. It might be corrupted.")
472
+ else:
473
+ st.warning(f"⚠️ Image file not found: {img_path}")
474
+ else:
475
+ st.warning("No similar products found.")
476
+
477
+ # -----------------------------------------------------------------------------
478
+ # 3) DESCRIPTION ONLY
479
+ # -----------------------------------------------------------------------------
480
+ elif search_method == "Description Only":
481
+ st.subheader("Enter a Description")
482
+ user_description = st.text_area("Type or paste your description here")
483
+
484
+ if user_description.strip():
485
+ # Let user choose how many similar products
486
+ n_similar = st.slider("How many similar products do you want?", 1, 10, 3)
487
+
488
+ # Button to trigger the search
489
+ if st.button("Find Similar Products"):
490
+ results = find_most_similar_products(
491
+ description=user_description,
492
+ n=n_similar,
493
+ combine_method="text-only"
494
+ )
495
+
496
+ if results:
497
+ for sim_score, matched_entry in results:
498
+ st.subheader(f"🔹 Match (Similarity: {sim_score:.4f})")
499
+ st.write(f"**Description:** {matched_entry.get('description','No description')}")
500
+
501
+ # Display the first image of the matched entry
502
+ if "original_image_paths" in matched_entry and matched_entry["original_image_paths"]:
503
+ img_path = os.path.normpath(matched_entry["original_image_paths"][0])
504
+ if os.path.exists(img_path):
505
+ try:
506
+ img_matched = Image.open(img_path).convert("RGB")
507
+ st.image(
508
+ img_matched,
509
+ caption=f"Matched Image (Sim: {sim_score:.4f})",
510
+ use_column_width=True
511
+ )
512
+ except UnidentifiedImageError:
513
+ st.warning(f"⚠️ Cannot open image: {img_path}. It might be corrupted.")
514
+ else:
515
+ st.warning(f"⚠️ Image file not found: {img_path}")
516
+ else:
517
+ st.warning("No similar products found.")
518
+
519
+ # -----------------------------------------------------------------------------
520
+ # 4) BOTH (IMAGE + DESCRIPTION)
521
+ # -----------------------------------------------------------------------------
522
+ elif search_method == "Both (Image + Description)":
523
+ st.subheader("Upload an Image and Enter a Description")
524
+ uploaded_image = st.file_uploader("Select an Image", type=["png", "jpg", "jpeg"])
525
+ user_description = st.text_area("Type or paste your description here")
526
+
527
+ if uploaded_image is not None:
528
+ image_obj = Image.open(uploaded_image).convert("RGB")
529
+ st.image(image_obj, use_column_width=True)
530
+
531
+ if user_description.strip():
532
+ # Let user choose how many similar products
533
+ n_similar = st.slider("How many similar products do you want?", 1, 10, 3)
534
+
535
+ # Button to trigger the search
536
+ if st.button("Find Similar Products"):
537
+ results = find_most_similar_products(
538
+ image=image_obj,
539
+ description=user_description,
540
+ n=n_similar,
541
+ combine_method="average"
542
+ )
543
+
544
+ if results:
545
+ for sim_score, matched_entry in results:
546
+ st.subheader(f"🔹 Match (Similarity: {sim_score:.4f})")
547
+ st.write(f"**Description:** {matched_entry.get('description','No description')}")
548
+
549
+ # Display the first image of the matched entry
550
+ if "original_image_paths" in matched_entry and matched_entry["original_image_paths"]:
551
+ img_path = os.path.normpath(matched_entry["original_image_paths"][0])
552
+ if os.path.exists(img_path):
553
+ try:
554
+ img_matched = Image.open(img_path).convert("RGB")
555
+ st.image(
556
+ img_matched,
557
+ caption=f"Matched Image (Sim: {sim_score:.4f})",
558
+ use_column_width=True
559
+ )
560
+ except UnidentifiedImageError:
561
+ st.warning(f"⚠️ Cannot open image: {img_path}. It might be corrupted.")
562
+ else:
563
+ st.warning(f"⚠️ Image file not found: {img_path}")
564
+ else:
565
+ st.warning("No similar products found.")
embeddings_metadata.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25f96cefa7b214660cef0e4ee06c3685141b17dd920944d7f8d724e65761d54a
3
+ size 209465
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ poppler-utils
2
+ tesseract-ocr
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.31.1
2
+ numpy==1.26.4
3
+ torch==2.6.0+cpu
4
+ PIL==10.2.0
5
+ sklearn==1.4.0
6
+ pdf2image==1.17.0
7
+ streamlit_cropper==0.2.1
8
+ pytesseract==0.3.10
9
+ reportlab==4.3.1