Spaces:
Running
Running
Commit
·
db395ea
1
Parent(s):
d55667a
progress more %%%%
Browse files
app.py
CHANGED
@@ -45,14 +45,6 @@ def translate(text):
|
|
45 |
input_length = inputs.input_ids.shape[1]
|
46 |
max_length = min(512, int(input_length * 1.5))
|
47 |
|
48 |
-
# Estimate total translation time (adjust this based on your observations)
|
49 |
-
estimated_time = input_length * 0.1 # 0.1 seconds per input token, adjust as needed
|
50 |
-
|
51 |
-
# Set up the progress bar
|
52 |
-
pbar = tqdm(total=100, desc="Translating", unit="%")
|
53 |
-
|
54 |
-
start_time = time.time()
|
55 |
-
|
56 |
# Generate translation
|
57 |
translated_tokens = translation_model.generate(
|
58 |
**inputs,
|
@@ -62,19 +54,6 @@ def translate(text):
|
|
62 |
early_stopping=True
|
63 |
)
|
64 |
|
65 |
-
# Update progress bar based on elapsed time
|
66 |
-
while time.time() - start_time < estimated_time:
|
67 |
-
elapsed = time.time() - start_time
|
68 |
-
progress = min(int((elapsed / estimated_time) * 100), 99)
|
69 |
-
pbar.n = progress
|
70 |
-
pbar.refresh()
|
71 |
-
time.sleep(0.1)
|
72 |
-
|
73 |
-
# Ensure the progress bar reaches 100%
|
74 |
-
pbar.n = 100
|
75 |
-
pbar.refresh()
|
76 |
-
pbar.close()
|
77 |
-
|
78 |
# Decode the translated tokens
|
79 |
translated_text = translation_tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
|
80 |
return translated_text
|
@@ -129,8 +108,6 @@ def fuzzy_deduplicate(df, column, threshold=65):
|
|
129 |
def process_file(uploaded_file):
|
130 |
df = pd.read_excel(uploaded_file, sheet_name='Публикации')
|
131 |
|
132 |
-
|
133 |
-
|
134 |
# Apply fuzzy deduplication
|
135 |
df = df.groupby('Объект').apply(lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65), include_groups=False).reset_index(drop=True)
|
136 |
|
|
|
45 |
input_length = inputs.input_ids.shape[1]
|
46 |
max_length = min(512, int(input_length * 1.5))
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
# Generate translation
|
49 |
translated_tokens = translation_model.generate(
|
50 |
**inputs,
|
|
|
54 |
early_stopping=True
|
55 |
)
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
# Decode the translated tokens
|
58 |
translated_text = translation_tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
|
59 |
return translated_text
|
|
|
108 |
def process_file(uploaded_file):
|
109 |
df = pd.read_excel(uploaded_file, sheet_name='Публикации')
|
110 |
|
|
|
|
|
111 |
# Apply fuzzy deduplication
|
112 |
df = df.groupby('Объект').apply(lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65), include_groups=False).reset_index(drop=True)
|
113 |
|