Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -51,7 +51,7 @@ def clean_extracted_text(text):
|
|
51 |
|
52 |
# Polish the text using a model
|
53 |
def polish_text_with_ai(cleaned_text):
|
54 |
-
prompt = f"Remove unwanted spaces between and inside words to join incomplete words, creating a meaningful sentence in either Hindi, English, or Hinglish without altering any words from the given extracted text. Then, return the corrected text with adjusted spaces, keeping it as close to the original as possible."
|
55 |
client = Groq(api_key="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")
|
56 |
chat_completion = client.chat.completions.create(
|
57 |
messages=[
|
@@ -172,18 +172,19 @@ if uploaded_file:
|
|
172 |
text_list = re.findall(r"text='(.*?)'", str(predictions[0]))
|
173 |
extracted_text = ' '.join(text_list)
|
174 |
|
175 |
-
|
176 |
-
|
177 |
polished_text = polish_text_with_ai(cleaned_text) if model_choice in ["GOT_CPU", "GOT_GPU"] else cleaned_text
|
178 |
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
|
184 |
# Display extracted text
|
185 |
st.subheader("Extracted Text (Cleaned & Polished)")
|
186 |
-
st.markdown(
|
|
|
187 |
|
188 |
# Search functionality
|
189 |
def update_search():
|
|
|
51 |
|
52 |
# Polish the text using a model
|
53 |
def polish_text_with_ai(cleaned_text):
|
54 |
+
prompt = f"Remove unwanted spaces between and inside words to join incomplete words, creating a meaningful sentence in either Hindi, English, or Hinglish without altering any words from the given extracted text. Then, return the corrected text with adjusted spaces, keeping it as close to the original as possible. Extracted Text : {cleaned_text}"
|
55 |
client = Groq(api_key="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")
|
56 |
chat_completion = client.chat.completions.create(
|
57 |
messages=[
|
|
|
172 |
text_list = re.findall(r"text='(.*?)'", str(predictions[0]))
|
173 |
extracted_text = ' '.join(text_list)
|
174 |
|
175 |
+
# Clean and polish extracted text
|
176 |
+
cleaned_text = clean_extracted_text(extracted_text)
|
177 |
polished_text = polish_text_with_ai(cleaned_text) if model_choice in ["GOT_CPU", "GOT_GPU"] else cleaned_text
|
178 |
|
179 |
+
# Save results to JSON file
|
180 |
+
result_data = {"extracted_text":extracted_text,"cleaner_text":cleaned_text,"polished_text": polished_text}
|
181 |
+
with open(result_path, 'w') as f:
|
182 |
+
json.dump(result_data, f)
|
183 |
|
184 |
# Display extracted text
|
185 |
st.subheader("Extracted Text (Cleaned & Polished)")
|
186 |
+
st.markdown(cleaned_text, unsafe_allow_html=True)
|
187 |
+
st.markdown(polished_text, unsafe_allow_html=True)
|
188 |
|
189 |
# Search functionality
|
190 |
def update_search():
|