pentarosarium commited on
Commit
d55667a
·
1 Parent(s): 2e78fc6

progress more %

Browse files
Files changed (1) hide show
  1. app.py +22 -14
app.py CHANGED
@@ -42,30 +42,37 @@ def translate(text):
42
  inputs = translation_tokenizer(text, return_tensors="pt", truncation=True)
43
 
44
  # Calculate max_length based on input length (you may need to adjust this ratio)
45
- max_length = min(512, int(inputs.input_ids.shape[1] * 1.5))
 
46
 
47
- # Calculate max_new_tokens
48
- max_new_tokens = max_length - inputs.input_ids.shape[1]
49
 
50
  # Set up the progress bar
51
- pbar = tqdm(total=max_new_tokens, desc="Translating", unit="token")
52
 
53
- # Custom callback to update the progress bar
54
- def update_progress_bar(beam_idx, token_idx, token):
55
- pbar.update(1)
56
 
57
- # Generate translation with progress updates
58
  translated_tokens = translation_model.generate(
59
  **inputs,
60
  max_length=max_length,
61
  num_beams=5,
62
  no_repeat_ngram_size=2,
63
- early_stopping=True,
64
- callback=update_progress_bar,
65
- callback_steps=1
66
  )
67
 
68
- # Close the progress bar
 
 
 
 
 
 
 
 
 
 
69
  pbar.close()
70
 
71
  # Decode the translated tokens
@@ -125,7 +132,8 @@ def process_file(uploaded_file):
125
 
126
 
127
  # Apply fuzzy deduplication
128
- df = df.groupby('Объект', group_keys=False).apply(lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65)).reset_index(drop=True)
 
129
 
130
  # Translate texts
131
  translated_texts = []
@@ -154,7 +162,7 @@ def process_file(uploaded_file):
154
  return df
155
 
156
  def main():
157
- st.title("... приступим к анализу...")
158
 
159
  uploaded_file = st.file_uploader("ВЫБИРАЙТЕ EXCEL-файл", type="xlsx")
160
 
 
42
  inputs = translation_tokenizer(text, return_tensors="pt", truncation=True)
43
 
44
  # Calculate max_length based on input length (you may need to adjust this ratio)
45
+ input_length = inputs.input_ids.shape[1]
46
+ max_length = min(512, int(input_length * 1.5))
47
 
48
+ # Estimate total translation time (adjust this based on your observations)
49
+ estimated_time = input_length * 0.1 # 0.1 seconds per input token, adjust as needed
50
 
51
  # Set up the progress bar
52
+ pbar = tqdm(total=100, desc="Translating", unit="%")
53
 
54
+ start_time = time.time()
 
 
55
 
56
+ # Generate translation
57
  translated_tokens = translation_model.generate(
58
  **inputs,
59
  max_length=max_length,
60
  num_beams=5,
61
  no_repeat_ngram_size=2,
62
+ early_stopping=True
 
 
63
  )
64
 
65
+ # Update progress bar based on elapsed time
66
+ while time.time() - start_time < estimated_time:
67
+ elapsed = time.time() - start_time
68
+ progress = min(int((elapsed / estimated_time) * 100), 99)
69
+ pbar.n = progress
70
+ pbar.refresh()
71
+ time.sleep(0.1)
72
+
73
+ # Ensure the progress bar reaches 100%
74
+ pbar.n = 100
75
+ pbar.refresh()
76
  pbar.close()
77
 
78
  # Decode the translated tokens
 
132
 
133
 
134
  # Apply fuzzy deduplication
135
+ df = df.groupby('Объект').apply(lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65), include_groups=False).reset_index(drop=True)
136
+
137
 
138
  # Translate texts
139
  translated_texts = []
 
162
  return df
163
 
164
  def main():
165
+ st.title("... приступим к анализу... версия 15")
166
 
167
  uploaded_file = st.file_uploader("ВЫБИРАЙТЕ EXCEL-файл", type="xlsx")
168