KuangDW commited on
Commit
1d63826
·
1 Parent(s): a6d920a

add chunk size function

Browse files
Files changed (1) hide show
  1. app.py +48 -50
app.py CHANGED
@@ -289,62 +289,60 @@ def process_text(text, src_language, target_language, max_iterations_value, thre
289
  source_segments = segment_sentences_by_punctuation(source_sentence, src_nlp)
290
 
291
  if chunk_size == -1:
292
- chunks = [' '.join(source_segments)]
293
- else:
294
- chunks = [' '.join(source_segments[i:i+chunk_size]) for i in range(0, len(source_segments), chunk_size)]
295
-
296
- org_translated_chunks = []
297
- p2a_translated_chunks = []
298
- bfn_translated_chunks = []
299
- mpc_translated_chunks = []
300
-
301
- for chunk in chunks:
302
  if "Original" in translation_methods:
303
- translation, _ = original_translation(chunk, src_language, target_language, session_id)
304
- org_translated_chunks.append(translation)
305
  if "Plan2Align" in translation_methods:
306
- translation, _ = plan2align_translate_text(
307
- chunk, session_id, model, tokenizer, device, src_language, target_language,
308
  max_iterations_value, threshold_value, good_ref_contexts_num_value, "metricx"
309
  )
310
- p2a_translated_chunks.append(translation)
311
  if "Best-of-N" in translation_methods:
312
- translation, _ = best_of_n_translation(chunk, src_language, target_language, max_iterations_value, session_id)
313
- bfn_translated_chunks.append(translation)
314
  if "MPC" in translation_methods:
315
- translation, _ = mpc_translation(chunk, src_language, target_language, max_iterations_value, session_id)
316
- mpc_translated_chunks.append(translation)
317
-
318
- org_combined_translation = ' '.join(org_translated_chunks)
319
- p2a_combined_translation = ' '.join(p2a_translated_chunks)
320
- bfn_combined_translation = ' '.join(bfn_translated_chunks)
321
- mpc_combined_translation = ' '.join(mpc_translated_chunks)
322
-
323
- orig, best_score = summary_translate(text, org_combined_translation, target_language, session_id)
324
- orig_output = f"{orig}\n\nScore: {best_score:.2f}"
325
- plan2align_trans, best_score = summary_translate(text, p2a_combined_translation, target_language, session_id)
326
- plan2align_output = f"{plan2align_trans}\n\nScore: {best_score:.2f}"
327
- best_candidate, best_score = summary_translate(text, bfn_combined_translation, target_language, session_id)
328
- best_of_n_output = f"{best_candidate}\n\nScore: {best_score:.2f}"
329
- mpc_candidate, best_score = summary_translate(text, mpc_combined_translation, target_language, session_id)
330
- mpc_output = f"{mpc_candidate}\n\nScore: {mpc_score:.2f}"
331
-
332
- # if "Original" in translation_methods:
333
- # orig, best_score = original_translation(text, src_language, target_language, session_id)
334
- # orig_output = f"{orig}\n\nScore: {best_score:.2f}"
335
- # if "Plan2Align" in translation_methods:
336
- # plan2align_trans, best_score = plan2align_translate_text(
337
- # text, session_id, model, tokenizer, device, src_language, target_language,
338
- # max_iterations_value, threshold_value, good_ref_contexts_num_value, "metricx"
339
- # )
340
- # plan2align_output = f"{plan2align_trans}\n\nScore: {best_score:.2f}"
341
- # if "Best-of-N" in translation_methods:
342
- # best_candidate, best_score = best_of_n_translation(text, src_language, target_language, max_iterations_value, session_id)
343
- # best_of_n_output = f"{best_candidate}\n\nScore: {best_score:.2f}"
344
- # if "MPC" in translation_methods:
345
- # mpc_candidate, mpc_score = mpc_translation(text, src_language, target_language,
346
- # max_iterations_value, session_id)
347
- # mpc_output = f"{mpc_candidate}\n\nScore: {mpc_score:.2f}"
 
 
 
 
 
 
 
 
348
 
349
  return orig_output, plan2align_output, best_of_n_output, mpc_output
350
 
 
289
  source_segments = segment_sentences_by_punctuation(source_sentence, src_nlp)
290
 
291
  if chunk_size == -1:
 
 
 
 
 
 
 
 
 
 
292
  if "Original" in translation_methods:
293
+ orig, best_score = original_translation(text, src_language, target_language, session_id)
294
+ orig_output = f"{orig}\n\nScore: {best_score:.2f}"
295
  if "Plan2Align" in translation_methods:
296
+ plan2align_trans, best_score = plan2align_translate_text(
297
+ text, session_id, model, tokenizer, device, src_language, target_language,
298
  max_iterations_value, threshold_value, good_ref_contexts_num_value, "metricx"
299
  )
300
+ plan2align_output = f"{plan2align_trans}\n\nScore: {best_score:.2f}"
301
  if "Best-of-N" in translation_methods:
302
+ best_candidate, best_score = best_of_n_translation(text, src_language, target_language, max_iterations_value, session_id)
303
+ best_of_n_output = f"{best_candidate}\n\nScore: {best_score:.2f}"
304
  if "MPC" in translation_methods:
305
+ mpc_candidate, mpc_score = mpc_translation(text, src_language, target_language,
306
+ max_iterations_value, session_id)
307
+ mpc_output = f"{mpc_candidate}\n\nScore: {mpc_score:.2f}"
308
+ else:
309
+ chunks = [' '.join(source_segments[i:i+chunk_size]) for i in range(0, len(source_segments), chunk_size)]
310
+
311
+ org_translated_chunks = []
312
+ p2a_translated_chunks = []
313
+ bfn_translated_chunks = []
314
+ mpc_translated_chunks = []
315
+
316
+ for chunk in chunks:
317
+ if "Original" in translation_methods:
318
+ translation, _ = original_translation(chunk, src_language, target_language, session_id)
319
+ org_translated_chunks.append(translation)
320
+ if "Plan2Align" in translation_methods:
321
+ translation, _ = plan2align_translate_text(
322
+ chunk, session_id, model, tokenizer, device, src_language, target_language,
323
+ max_iterations_value, threshold_value, good_ref_contexts_num_value, "metricx"
324
+ )
325
+ p2a_translated_chunks.append(translation)
326
+ if "Best-of-N" in translation_methods:
327
+ translation, _ = best_of_n_translation(chunk, src_language, target_language, max_iterations_value, session_id)
328
+ bfn_translated_chunks.append(translation)
329
+ if "MPC" in translation_methods:
330
+ translation, _ = mpc_translation(chunk, src_language, target_language, max_iterations_value, session_id)
331
+ mpc_translated_chunks.append(translation)
332
+
333
+ org_combined_translation = ' '.join(org_translated_chunks)
334
+ p2a_combined_translation = ' '.join(p2a_translated_chunks)
335
+ bfn_combined_translation = ' '.join(bfn_translated_chunks)
336
+ mpc_combined_translation = ' '.join(mpc_translated_chunks)
337
+
338
+ orig, best_score = summary_translate(text, org_combined_translation, target_language, session_id)
339
+ orig_output = f"{orig}\n\nScore: {best_score:.2f}"
340
+ plan2align_trans, best_score = summary_translate(text, p2a_combined_translation, target_language, session_id)
341
+ plan2align_output = f"{plan2align_trans}\n\nScore: {best_score:.2f}"
342
+ best_candidate, best_score = summary_translate(text, bfn_combined_translation, target_language, session_id)
343
+ best_of_n_output = f"{best_candidate}\n\nScore: {best_score:.2f}"
344
+ mpc_candidate, best_score = summary_translate(text, mpc_combined_translation, target_language, session_id)
345
+ mpc_output = f"{mpc_candidate}\n\nScore: {best_score:.2f}"
346
 
347
  return orig_output, plan2align_output, best_of_n_output, mpc_output
348