bartman081523 commited on
Commit
8017292
·
1 Parent(s): 105bad1

fix translation errors

Browse files
Files changed (1) hide show
  1. app.py +160 -59
app.py CHANGED
@@ -1,12 +1,17 @@
1
  import json
2
- import datetime
3
  import logging
 
 
 
 
 
4
 
5
  import gradio as gr
6
- from deep_translator import GoogleTranslator, exceptions
7
-
8
  from utils import process_json_files, flatten_text_with_line_breaks, calculate_tanach_statistics, build_word_index
9
- import unittest
 
 
 
10
 
11
  # Set up logging
12
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -22,8 +27,8 @@ def get_current_word_data(client_time_str):
22
  try:
23
  client_time = datetime.datetime.strptime(client_time_str, "%H:%M:%S")
24
  total_seconds = int(client_time.strftime("%H")) * 3600 + \
25
- int(client_time.strftime("%M")) * 60 + \
26
- int(client_time.strftime("%S"))
27
 
28
  # Find the closest key in WORD_INDEX
29
  word_position = min(WORD_INDEX.keys(), key=lambda k: abs(k - total_seconds))
@@ -33,37 +38,83 @@ def get_current_word_data(client_time_str):
33
  logging.error(f"Error processing client time: {e}")
34
  return None, None
35
 
36
-
37
  def get_formatted_verse(book_id, chapter_id, verse_id, highlight_word=True):
38
  """Returns a formatted verse with optional word highlighting."""
39
  chapter_text = TANACH_DATA[book_id]["text"][chapter_id]
40
  flattened_chapter = flatten_text_with_line_breaks(chapter_text)
41
 
42
- if highlight_word:
 
43
  flattened_chapter[verse_id - 1] = \
44
- f"<span class='highlight'>{flattened_chapter[verse_id - 1]}</span>"
45
 
46
  return '<br>'.join(flattened_chapter)
47
 
48
- def translate_verse(hebrew_verse, verse_id, highlight_word=True):
49
- """Translates a Hebrew verse to English and highlights the given word."""
 
50
  try:
51
  translator = GoogleTranslator(source='iw', target='en')
52
- translated_text = translator.translate(hebrew_verse).split('\n')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
- if highlight_word:
55
- translated_text[verse_id - 1] = \
56
- f"<span class='highlight'>{translated_text[verse_id - 1]}</span>"
57
 
58
- return '<br>'.join(translated_text)
59
- except exceptions.TranslationError as e:
60
- logging.warning(f"Translation failed: {e}")
61
- return "Translation unavailable"
62
 
63
  # --- Gradio Interface ---
64
 
65
- def update_tanach_display(client_time_str):
66
  """Updates the Gradio interface with client time, verse info, and translations."""
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  word_data, word_position = get_current_word_data(client_time_str)
69
 
@@ -85,49 +136,74 @@ def update_tanach_display(client_time_str):
85
 
86
  # Get and format Hebrew and English verses
87
  hebrew_verse = get_formatted_verse(book_id, chapter_id, verse_id)
88
- english_verse = translate_verse('\n'.join(hebrew_verse.split('<br>')), verse_id)
89
 
90
  return verse_info, hebrew_verse, english_verse
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  with gr.Blocks(css="""
93
- .container {
94
- display: flex;
95
- flex-direction: column;
96
- align-items: center;
97
- font-family: 'Times New Roman', serif;
98
- }
99
- .highlight {
100
- background-color: #FFFF00;
101
- padding: 2px 5px;
102
- border-radius: 5px;
103
- }
104
- #verse-info {
105
- margin-bottom: 20px;
106
- text-align: center;
107
- }
108
- #verses {
109
- display: flex;
110
- flex-direction: row;
111
- justify-content: center;
112
- align-items: flex-start;
113
- gap: 50px;
114
- }
115
- #hebrew-verse {
116
- font-size: 18px;
117
- line-height: 1.5;
118
- margin-bottom: 20px;
119
- text-align: right;
120
- direction: rtl;
121
- }
122
- #english-verse {
123
- font-size: 18px;
124
- line-height: 1.5;
125
- margin-bottom: 20px;
126
- }
 
127
  """) as iface:
128
 
129
  with gr.Row():
130
- client_time_input = gr.Textbox(label="Enter your current time (HH:MM:SS)")
 
131
 
132
  with gr.Row():
133
  verse_info_output = gr.Markdown(label="Verse Information", elem_id="verse-info")
@@ -137,10 +213,35 @@ with gr.Blocks(css="""
137
  hebrew_verse_output = gr.HTML(label="Hebrew Verse", elem_id="hebrew-verse")
138
  english_verse_output = gr.HTML(label="English Translation", elem_id="english-verse")
139
 
140
- client_time_input.submit(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  fn=update_tanach_display,
142
- inputs=[client_time_input],
143
- outputs=[verse_info_output, hebrew_verse_output, english_verse_output]
 
 
 
 
 
 
 
 
 
144
  )
145
 
146
  class TestWordIndex(unittest.TestCase):
 
1
  import json
 
2
  import logging
3
+ import datetime
4
+ import time
5
+ import requests
6
+ import pytz
7
+ import unittest
8
 
9
  import gradio as gr
 
 
10
  from utils import process_json_files, flatten_text_with_line_breaks, calculate_tanach_statistics, build_word_index
11
+
12
+ import logging
13
+ from deep_translator import GoogleTranslator
14
+ from deep_translator.exceptions import NotValidLength, RequestError
15
 
16
  # Set up logging
17
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
 
27
  try:
28
  client_time = datetime.datetime.strptime(client_time_str, "%H:%M:%S")
29
  total_seconds = int(client_time.strftime("%H")) * 3600 + \
30
+ int(client_time.strftime("%M")) * 60 + \
31
+ int(client_time.strftime("%S"))
32
 
33
  # Find the closest key in WORD_INDEX
34
  word_position = min(WORD_INDEX.keys(), key=lambda k: abs(k - total_seconds))
 
38
  logging.error(f"Error processing client time: {e}")
39
  return None, None
40
 
 
41
  def get_formatted_verse(book_id, chapter_id, verse_id, highlight_word=True):
42
  """Returns a formatted verse with optional word highlighting."""
43
  chapter_text = TANACH_DATA[book_id]["text"][chapter_id]
44
  flattened_chapter = flatten_text_with_line_breaks(chapter_text)
45
 
46
+ # Highlight the word *before* joining with <br>
47
+ if highlight_word and 0 <= verse_id - 1 < len(flattened_chapter):
48
  flattened_chapter[verse_id - 1] = \
49
+ f"<span class='highlight'>{flattened_chapter[verse_id - 1]}</span>"
50
 
51
  return '<br>'.join(flattened_chapter)
52
 
53
+
54
+ def translate_verse(hebrew_verse, highlight_word=True):
55
+ """Translates a Hebrew verse to English, splitting into chunks if necessary."""
56
  try:
57
  translator = GoogleTranslator(source='iw', target='en')
58
+ max_length = 2000 # Slightly below the limit to be safe
59
+ translated_text = ""
60
+
61
+ # Split the verse into chunks smaller than the max length
62
+ chunks = [hebrew_verse[i:i + max_length] for i in range(0, len(hebrew_verse), max_length)]
63
+
64
+ for chunk_index, chunk in enumerate(chunks):
65
+ # Translate the current chunk
66
+ translated_chunk = translator.translate(chunk)
67
+
68
+ # If it's not the first chunk, find the last line break and start from there
69
+ if chunk_index > 0:
70
+ last_line_break = translated_chunk.rfind('<br>', 0, 100) # Find last <br> in first 100 chars
71
+ if last_line_break != -1:
72
+ translated_text += translated_chunk[last_line_break + 4:] # Add from after <br>
73
+ else:
74
+ translated_text += translated_chunk
75
+ else:
76
+ translated_text += translated_chunk
77
+
78
+ return translated_text
79
+
80
+ except RequestError as e:
81
+ logging.warning(f"Translation failed: Request Error - {e}")
82
+ return "Translation unavailable: Request Error"
83
+
84
+
85
+ def get_client_time_from_ip(ip_address):
86
+ """Attempts to get client time using IP address and API."""
87
+ try:
88
+ api_url = f"http://ip-api.com/json/{ip_address}"
89
+ response = requests.get(api_url)
90
+ response.raise_for_status() # Raise an exception for bad status codes
91
+
92
+ data = response.json()
93
+ timezone = data.get("timezone")
94
 
95
+ if timezone:
96
+ return timezone # Return timezone only
 
97
 
98
+ except requests.exceptions.RequestException as e:
99
+ logging.warning(f"Error fetching time from IP: {e}")
100
+ return None
 
101
 
102
  # --- Gradio Interface ---
103
 
104
+ def update_tanach_display(client_time_str, timezone):
105
  """Updates the Gradio interface with client time, verse info, and translations."""
106
+ try:
107
+ # Get timezone offset using pytz
108
+ tz = pytz.timezone(timezone)
109
+ offset = tz.utcoffset(datetime.datetime.now()).total_seconds() / 3600 # Offset in hours
110
+
111
+ # Adjust client time based on the timezone
112
+ client_time_obj = datetime.datetime.strptime(client_time_str, "%H:%M:%S")
113
+ client_time_obj = client_time_obj.replace(tzinfo=datetime.timezone(datetime.timedelta(hours=offset)))
114
+ client_time_str = client_time_obj.strftime("%H:%M:%S")
115
+ except Exception as e:
116
+ logging.error(f"Error adjusting client time based on timezone: {e}")
117
+ return "Error: Invalid Timezone", "", ""
118
 
119
  word_data, word_position = get_current_word_data(client_time_str)
120
 
 
136
 
137
  # Get and format Hebrew and English verses
138
  hebrew_verse = get_formatted_verse(book_id, chapter_id, verse_id)
139
+ english_verse = translate_verse('\n'.join(hebrew_verse.split('<br>')), highlight_word=False)
140
 
141
  return verse_info, hebrew_verse, english_verse
142
 
143
+ def auto_advance(client_time_str, timezone):
144
+ """Automatically advances the text based on the client's time and a fixed interval."""
145
+ while True:
146
+ current_time = datetime.datetime.now().strftime("%H:%M:%S")
147
+ verse_info, hebrew_verse, english_verse = update_tanach_display(current_time, timezone)
148
+ yield verse_info, hebrew_verse, english_verse
149
+ time.sleep(1) # Update every second
150
+
151
+ # --- Fetching User's IP ---
152
+
153
+ def fetch_user_ip():
154
+ """Fetches the user's IP address using a public API."""
155
+ try:
156
+ response = requests.get('https://api.ipify.org?format=json')
157
+ response.raise_for_status()
158
+ data = response.json()
159
+ return data.get('ip')
160
+ except requests.exceptions.RequestException as e:
161
+ logging.warning(f"Error fetching user's IP: {e}")
162
+ return None
163
+
164
+ # --- Gradio Interface ---
165
+
166
  with gr.Blocks(css="""
167
+ .container {
168
+ display: flex;
169
+ flex-direction: column;
170
+ align-items: center;
171
+ font-family: 'Times New Roman', serif;
172
+ }
173
+ /* Add this highlight class styling */
174
+ .highlight {
175
+ background-color: #FFFF00; /* Yellow highlight */
176
+ padding: 2px 5px;
177
+ border-radius: 5px;
178
+ }
179
+ #verse-info {
180
+ margin-bottom: 20px;
181
+ text-align: center;
182
+ }
183
+ #verses {
184
+ display: flex;
185
+ flex-direction: row;
186
+ justify-content: center;
187
+ align-items: flex-start;
188
+ gap: 50px;
189
+ }
190
+ #hebrew-verse {
191
+ font-size: 18px;
192
+ line-height: 1.5;
193
+ margin-bottom: 20px;
194
+ text-align: right;
195
+ direction: rtl;
196
+ }
197
+ #english-verse {
198
+ font-size: 18px;
199
+ line-height: 1.5;
200
+ margin-bottom: 20px;
201
+ }
202
  """) as iface:
203
 
204
  with gr.Row():
205
+ client_ip_input = gr.Textbox(label="Enter your IP address (optional)", value="")
206
+ timezone_input = gr.Textbox(label="Timezone", value="", interactive=False) # Added timezone input
207
 
208
  with gr.Row():
209
  verse_info_output = gr.Markdown(label="Verse Information", elem_id="verse-info")
 
213
  hebrew_verse_output = gr.HTML(label="Hebrew Verse", elem_id="hebrew-verse")
214
  english_verse_output = gr.HTML(label="English Translation", elem_id="english-verse")
215
 
216
+ # Fetch user's IP and get timezone
217
+ gr.Button("Fetch IP and Timezone").click(
218
+ fn=fetch_user_ip,
219
+ inputs=[],
220
+ outputs=[client_ip_input],
221
+ queue=False,
222
+ )
223
+
224
+ client_ip_input.change(
225
+ fn=get_client_time_from_ip,
226
+ inputs=[client_ip_input],
227
+ outputs=[timezone_input],
228
+ queue=False,
229
+ )
230
+
231
+ # Update the display with verse information and translations
232
+ client_ip_input.submit(
233
  fn=update_tanach_display,
234
+ inputs=[client_ip_input, timezone_input],
235
+ outputs=[verse_info_output, hebrew_verse_output, english_verse_output],
236
+ queue=False
237
+ )
238
+
239
+ # Start automatic advancement
240
+ gr.Button("Update Position").click(
241
+ fn=auto_advance,
242
+ inputs=[client_ip_input, timezone_input],
243
+ outputs=[verse_info_output, hebrew_verse_output, english_verse_output],
244
+ queue=False
245
  )
246
 
247
  class TestWordIndex(unittest.TestCase):