Spaces:

dimasdeffieux
/

explain_lang

Sleeping

App Files Files Community

dimasdeffieux commited on 10 days ago

Commit

c516aac

verified ·

1 Parent(s): 1ede285

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -74

app.py CHANGED Viewed

@@ -1,88 +1,70 @@
-import requests
-from paddleocr import PaddleOCR, draw_ocr
-from PIL import Image
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import torch
-img = "input_data/ocr_input/korean1.jpg"
-text = "표현이 서툰 것도 잘못인가요. 나 차가운 도시에 따뜻한 여잔데. 그냥 좋아한단 말도 안 되는가요. 솔직하게 난 말하고 싶어요"
-model_id = "deepseek-ai/deepseek-llm-7b-chat"
-tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True)
 def text_inference(text, language):
-    system_prompt = (
-        f"Given the following {language} text, convert each word into their base form. Remove all duplicates. Return the base form words as a comma-separated list, and nothing else."
     )
-    user_prompt = f"{system_prompt}\n\nText:\n{text}"
-    input_ids = tokenizer.apply_chat_template([{"role": "user", "content": user_prompt}], return_tensors="pt").to(model.device)
-    output_ids = model.generate(input_ids, max_new_tokens=256)
-    output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-     # Parse response: take last line, split by commas
-    last_line = output_text.strip().split("\n")[-1]
-    words = [w.strip() for w in last_line.split(",") if w.strip()]
     return words
-def ocr_inference(img, lang):
-	ocr = PaddleOCR(use_angle_cls=True, lang=lang,use_gpu=False)
-	img_path = img
-	result = ocr.ocr(img_path, cls=True)[0]
-	image = Image.open(img_path).convert('RGB')
-	boxes = [line[0] for line in result]
-	txts = [line[1][0] for line in result]
-	scores = [line[1][1] for line in result]
-	return txts
 def make_flashcards(words, language):
-    system_prompt = (
-        f"for each {language} word in the list, write a flashcard in this format: the word, then its definition, then an example sentence using the word, and then a translation of example sentence"
     )
-    user_prompt = f"{system_prompt}\n\nWords:\n{words}"
-    input_ids = tokenizer.apply_chat_template([{"role": "user", "content": user_prompt}], return_tensors="pt").to(model.device)
-    output_ids = model.generate(input_ids, max_new_tokens=256)
-    output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    # Parse response: take last line, split by commas
-    last_line = output_text.strip().split("\n")[-1]
-    output = [w.strip() for w in last_line.split(":") if w.strip()]
-    return output
-# words=text_inference(text, "korean")
-# print("OUTPUT TOUT OUETOI EIFJ IEFJ",words)
-# print("flashcard output:",make_flashcards(words, "korean"))
-# print("OCR OUTPUT: ", ocr_inference(img, "korean"))
-# words=text_inference(text, "korean")
-# print("TEXT INPUT: ", text)
-# print("WORD PARSING: ",words)
-# print("flashcard output:",make_flashcards(words, "korean"))
-examples = [
-    [{"text": "@RolmOCR OCR the Text in the Image", "files": ["rolm/1.jpeg"]}],
-    [{"text": "@RolmOCR Explain the Ad in Detail", "files": ["examples/videoplayback.mp4"]}],
-    [{"text": "@RolmOCR OCR the Image", "files": ["rolm/3.jpeg"]}],
-    [{"text": "Extract as JSON table from the table", "files": ["examples/4.jpg"]}],
-]
-demo = gr.ChatInterface(
-    fn=ocr_inference,
-    description="# **Multimodal OCR `@RolmOCR and Default Qwen2VL OCR`**",
-    examples=examples,
-    textbox=gr.MultimodalTextbox(
-        label="Query Input",
-        file_types=["image", "video"],
-        file_count="multiple",
-        placeholder="Use tag @RolmOCR for RolmOCR, or leave blank for default Qwen2VL OCR"
-    ),
-    stop_btn="Stop Generation",
-    multimodal=True,
-    cache_examples=False,
 )
-demo.launch(debug=True)

 import gradio as gr
+from llama_cpp import Llama
+from paddleocr import PaddleOCR
+from PIL import Image
+# Load GGUF model
+llm = Llama(
+    model_path="./deepseek-v3-0324.Q4_K_M.gguf",  # Make sure this file is in your repo
+    n_ctx=2048,
+    n_threads=8,
+    n_gpu_layers=20  # Set to 0 if you are on CPU-only
+)
+# OCR Function
+def ocr_inference(img, lang):
+    ocr = PaddleOCR(use_angle_cls=True, lang=lang, use_gpu=False)
+    result = ocr.ocr(img, cls=True)[0]
+    txts = [line[1][0] for line in result]
+    return " ".join(txts)
+# Step 1: Convert text to base form words
 def text_inference(text, language):
+    prompt = (
+        f"Given the following {language} text, convert each word into its base form. "
+        f"Remove all duplicates. Return the base form words as a comma-separated list.\n\n"
+        f"Text:\n{text}"
     )
+    response = llm(prompt, max_tokens=256, stop=["</s>"])
+    output_text = response["choices"][0]["text"].strip()
+    words = [w.strip() for w in output_text.split(",") if w.strip()]
     return words
+# Step 2: Generate flashcards for those words
 def make_flashcards(words, language):
+    prompt = (
+        f"For each {language} word in the list, write a flashcard in this format:\n"
+        f"word - definition - example sentence - translated sentence.\n\n"
+        f"Words:\n{', '.join(words)}"
     )
+    response = llm(prompt, max_tokens=512, stop=["</s>"])
+    return response["choices"][0]["text"].strip()
+# Wrapper logic to handle OCR or text
+def flashcard_pipeline(text, image, language):
+    if image:
+        text = ocr_inference(image, language)
+    if not text:
+        return "", "Please provide either text or an image."
+    words = text_inference(text, language)
+    flashcards = make_flashcards(words, language)
+    return "\n".join(words), flashcards
+# Gradio UI
+demo = gr.Interface(
+    fn=flashcard_pipeline,
+    inputs=[
+        gr.Textbox(label="Input Text (leave empty to use image)", lines=4, placeholder="Type or paste sentence here..."),
+        gr.Image(label="Upload Image for OCR (optional)", type="filepath"),
+        gr.Dropdown(choices=["korean", "japan", "french", "ch"], label="Language (for OCR and LLM)")
+    ],
+    outputs=[
+        gr.Textbox(label="Base Form Words"),
+        gr.Textbox(label="Flashcards"),
+    ],
+    title="Language Flashcard Generator (with OCR + DeepSeek GGUF)",
+    description="Either input text or upload an image. The app will extract words, lemmatize them, and generate flashcards."
 )
+if __name__ == "__main__":
+    demo.launch()