pentarosarium commited on
Commit
db395ea
·
1 Parent(s): d55667a

progress more %%%%

Browse files
Files changed (1) hide show
  1. app.py +0 -23
app.py CHANGED
@@ -45,14 +45,6 @@ def translate(text):
45
  input_length = inputs.input_ids.shape[1]
46
  max_length = min(512, int(input_length * 1.5))
47
 
48
- # Estimate total translation time (adjust this based on your observations)
49
- estimated_time = input_length * 0.1 # 0.1 seconds per input token, adjust as needed
50
-
51
- # Set up the progress bar
52
- pbar = tqdm(total=100, desc="Translating", unit="%")
53
-
54
- start_time = time.time()
55
-
56
  # Generate translation
57
  translated_tokens = translation_model.generate(
58
  **inputs,
@@ -62,19 +54,6 @@ def translate(text):
62
  early_stopping=True
63
  )
64
 
65
- # Update progress bar based on elapsed time
66
- while time.time() - start_time < estimated_time:
67
- elapsed = time.time() - start_time
68
- progress = min(int((elapsed / estimated_time) * 100), 99)
69
- pbar.n = progress
70
- pbar.refresh()
71
- time.sleep(0.1)
72
-
73
- # Ensure the progress bar reaches 100%
74
- pbar.n = 100
75
- pbar.refresh()
76
- pbar.close()
77
-
78
  # Decode the translated tokens
79
  translated_text = translation_tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
80
  return translated_text
@@ -129,8 +108,6 @@ def fuzzy_deduplicate(df, column, threshold=65):
129
  def process_file(uploaded_file):
130
  df = pd.read_excel(uploaded_file, sheet_name='Публикации')
131
 
132
-
133
-
134
  # Apply fuzzy deduplication
135
  df = df.groupby('Объект').apply(lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65), include_groups=False).reset_index(drop=True)
136
 
 
45
  input_length = inputs.input_ids.shape[1]
46
  max_length = min(512, int(input_length * 1.5))
47
 
 
 
 
 
 
 
 
 
48
  # Generate translation
49
  translated_tokens = translation_model.generate(
50
  **inputs,
 
54
  early_stopping=True
55
  )
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  # Decode the translated tokens
58
  translated_text = translation_tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
59
  return translated_text
 
108
  def process_file(uploaded_file):
109
  df = pd.read_excel(uploaded_file, sheet_name='Публикации')
110
 
 
 
111
  # Apply fuzzy deduplication
112
  df = df.groupby('Объект').apply(lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65), include_groups=False).reset_index(drop=True)
113