Spaces:
Running
Running
Commit
·
d55667a
1
Parent(s):
2e78fc6
progress more %
Browse files
app.py
CHANGED
@@ -42,30 +42,37 @@ def translate(text):
|
|
42 |
inputs = translation_tokenizer(text, return_tensors="pt", truncation=True)
|
43 |
|
44 |
# Calculate max_length based on input length (you may need to adjust this ratio)
|
45 |
-
|
|
|
46 |
|
47 |
-
#
|
48 |
-
|
49 |
|
50 |
# Set up the progress bar
|
51 |
-
pbar = tqdm(total=
|
52 |
|
53 |
-
|
54 |
-
def update_progress_bar(beam_idx, token_idx, token):
|
55 |
-
pbar.update(1)
|
56 |
|
57 |
-
# Generate translation
|
58 |
translated_tokens = translation_model.generate(
|
59 |
**inputs,
|
60 |
max_length=max_length,
|
61 |
num_beams=5,
|
62 |
no_repeat_ngram_size=2,
|
63 |
-
early_stopping=True
|
64 |
-
callback=update_progress_bar,
|
65 |
-
callback_steps=1
|
66 |
)
|
67 |
|
68 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
pbar.close()
|
70 |
|
71 |
# Decode the translated tokens
|
@@ -125,7 +132,8 @@ def process_file(uploaded_file):
|
|
125 |
|
126 |
|
127 |
# Apply fuzzy deduplication
|
128 |
-
df = df.groupby('Объект'
|
|
|
129 |
|
130 |
# Translate texts
|
131 |
translated_texts = []
|
@@ -154,7 +162,7 @@ def process_file(uploaded_file):
|
|
154 |
return df
|
155 |
|
156 |
def main():
|
157 |
-
st.title("... приступим к анализу...")
|
158 |
|
159 |
uploaded_file = st.file_uploader("ВЫБИРАЙТЕ EXCEL-файл", type="xlsx")
|
160 |
|
|
|
42 |
inputs = translation_tokenizer(text, return_tensors="pt", truncation=True)
|
43 |
|
44 |
# Calculate max_length based on input length (you may need to adjust this ratio)
|
45 |
+
input_length = inputs.input_ids.shape[1]
|
46 |
+
max_length = min(512, int(input_length * 1.5))
|
47 |
|
48 |
+
# Estimate total translation time (adjust this based on your observations)
|
49 |
+
estimated_time = input_length * 0.1 # 0.1 seconds per input token, adjust as needed
|
50 |
|
51 |
# Set up the progress bar
|
52 |
+
pbar = tqdm(total=100, desc="Translating", unit="%")
|
53 |
|
54 |
+
start_time = time.time()
|
|
|
|
|
55 |
|
56 |
+
# Generate translation
|
57 |
translated_tokens = translation_model.generate(
|
58 |
**inputs,
|
59 |
max_length=max_length,
|
60 |
num_beams=5,
|
61 |
no_repeat_ngram_size=2,
|
62 |
+
early_stopping=True
|
|
|
|
|
63 |
)
|
64 |
|
65 |
+
# Update progress bar based on elapsed time
|
66 |
+
while time.time() - start_time < estimated_time:
|
67 |
+
elapsed = time.time() - start_time
|
68 |
+
progress = min(int((elapsed / estimated_time) * 100), 99)
|
69 |
+
pbar.n = progress
|
70 |
+
pbar.refresh()
|
71 |
+
time.sleep(0.1)
|
72 |
+
|
73 |
+
# Ensure the progress bar reaches 100%
|
74 |
+
pbar.n = 100
|
75 |
+
pbar.refresh()
|
76 |
pbar.close()
|
77 |
|
78 |
# Decode the translated tokens
|
|
|
132 |
|
133 |
|
134 |
# Apply fuzzy deduplication
|
135 |
+
df = df.groupby('Объект').apply(lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65), include_groups=False).reset_index(drop=True)
|
136 |
+
|
137 |
|
138 |
# Translate texts
|
139 |
translated_texts = []
|
|
|
162 |
return df
|
163 |
|
164 |
def main():
|
165 |
+
st.title("... приступим к анализу... версия 15")
|
166 |
|
167 |
uploaded_file = st.file_uploader("ВЫБИРАЙТЕ EXCEL-файл", type="xlsx")
|
168 |
|