Spaces:

minishlab
/

semantic-deduplication

Running

App Files Files Community

Pringled commited on Oct 12, 2024

Commit

75ff340

1 Parent(s): 1d331c4

Updates

Browse files

Files changed (1) hide show

app.py +12 -2

app.py CHANGED Viewed

@@ -20,24 +20,33 @@ default_threshold = 0.9
 ds_default1 = load_dataset(default_dataset1_name, split=default_dataset1_split)
 ds_default2 = load_dataset(default_dataset2_name, split=default_dataset2_split)
 from tqdm import tqdm as original_tqdm
 # Patch tqdm to use Gradio's progress bar
 def patch_tqdm_for_gradio(progress):
     class GradioTqdm(original_tqdm):
         def __init__(self, *args, **kwargs):
             super().__init__(*args, **kwargs)
             self.progress = progress
             self.total_batches = kwargs.get('total', len(args[0])) if len(args) > 0 else 1
         def update(self, n=1):
             super().update(n)
-            self.progress(self.n / self.total_batches)
     return GradioTqdm
 # Function to patch the original encode function with our Gradio tqdm
 def original_encode_with_tqdm(original_encode_func, patched_tqdm):
     def new_encode(*args, **kwargs):
-        # Replace tqdm with our patched version
         original_tqdm_backup = original_tqdm
         try:
             # Patch the `tqdm` within encode
@@ -49,6 +58,7 @@ def original_encode_with_tqdm(original_encode_func, patched_tqdm):
     return new_encode
 def batch_iterable(iterable, batch_size):
     """Helper function to create batches from an iterable."""
     for i in range(0, len(iterable), batch_size):

 ds_default1 = load_dataset(default_dataset1_name, split=default_dataset1_split)
 ds_default2 = load_dataset(default_dataset2_name, split=default_dataset2_split)
+# Patch tqdm to use Gradio's progress bar
 from tqdm import tqdm as original_tqdm
 # Patch tqdm to use Gradio's progress bar
 def patch_tqdm_for_gradio(progress):
     class GradioTqdm(original_tqdm):
         def __init__(self, *args, **kwargs):
             super().__init__(*args, **kwargs)
             self.progress = progress
+            # Set smaller step sizes or update more frequently based on total items
             self.total_batches = kwargs.get('total', len(args[0])) if len(args) > 0 else 1
+            self.update_interval = max(1, self.total_batches // 100)  # Update every 1% of progress
         def update(self, n=1):
             super().update(n)
+            # Only update Gradio's progress every `update_interval` steps
+            if self.n % self.update_interval == 0 or self.n == self.total_batches:
+                self.progress(self.n / self.total_batches)
     return GradioTqdm
 # Function to patch the original encode function with our Gradio tqdm
 def original_encode_with_tqdm(original_encode_func, patched_tqdm):
     def new_encode(*args, **kwargs):
         original_tqdm_backup = original_tqdm
         try:
             # Patch the `tqdm` within encode
     return new_encode
 def batch_iterable(iterable, batch_size):
     """Helper function to create batches from an iterable."""
     for i in range(0, len(iterable), batch_size):