Spaces:

pentarosarium
/

processor

Running

App Files Files Community

pentarosarium commited on Oct 17, 2024

Commit

db395ea

1 Parent(s): d55667a

progress more %%%%

Browse files

Files changed (1) hide show

app.py +0 -23

app.py CHANGED Viewed

@@ -45,14 +45,6 @@ def translate(text):
     input_length = inputs.input_ids.shape[1]
     max_length = min(512, int(input_length * 1.5))
-    # Estimate total translation time (adjust this based on your observations)
-    estimated_time = input_length * 0.1  # 0.1 seconds per input token, adjust as needed
-    # Set up the progress bar
-    pbar = tqdm(total=100, desc="Translating", unit="%")
-    start_time = time.time()
     # Generate translation
     translated_tokens = translation_model.generate(
         **inputs,
@@ -62,19 +54,6 @@ def translate(text):
         early_stopping=True
     )
-    # Update progress bar based on elapsed time
-    while time.time() - start_time < estimated_time:
-        elapsed = time.time() - start_time
-        progress = min(int((elapsed / estimated_time) * 100), 99)
-        pbar.n = progress
-        pbar.refresh()
-        time.sleep(0.1)
-    # Ensure the progress bar reaches 100%
-    pbar.n = 100
-    pbar.refresh()
-    pbar.close()
     # Decode the translated tokens
     translated_text = translation_tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
     return translated_text
@@ -129,8 +108,6 @@ def fuzzy_deduplicate(df, column, threshold=65):
 def process_file(uploaded_file):
     df = pd.read_excel(uploaded_file, sheet_name='Публикации')
     # Apply fuzzy deduplication
     df = df.groupby('Объект').apply(lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65), include_groups=False).reset_index(drop=True)

     input_length = inputs.input_ids.shape[1]
     max_length = min(512, int(input_length * 1.5))
     # Generate translation
     translated_tokens = translation_model.generate(
         **inputs,
         early_stopping=True
     )
     # Decode the translated tokens
     translated_text = translation_tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
     return translated_text
 def process_file(uploaded_file):
     df = pd.read_excel(uploaded_file, sheet_name='Публикации')
     # Apply fuzzy deduplication
     df = df.groupby('Объект').apply(lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65), include_groups=False).reset_index(drop=True)