File size: 2,533 Bytes
4d76f31
685f947
c516aac
 
4d76f31
685f947
 
2000233
685f947
2000233
c516aac
 
 
 
2000233
685f947
 
2000233
 
685f947
a378385
c516aac
 
685f947
c516aac
e2cf15d
685f947
 
 
 
 
 
 
 
a20083b
685f947
c516aac
685f947
 
 
 
 
 
 
c516aac
685f947
c516aac
 
685f947
c516aac
 
 
a20083b
685f947
c516aac
 
 
685f947
 
 
c516aac
 
 
 
 
685f947
 
5efe93f
 
c516aac
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import gradio as gr
from huggingface_hub import InferenceClient
from paddleocr import PaddleOCR
from PIL import Image

# Use the hosted model
client = InferenceClient("unsloth/DeepSeek-V3-0324-GGUF")

# Extract words in base form
def text_inference(text, language):
    prompt = (
        f"Given the following {language} text, convert each word into its base form. "
        f"Remove all duplicates. Return the base form words as a comma-separated list.\n\n"
        f"Text:\n{text}"
    )
    response = client.text_generation(prompt, max_new_tokens=256, temperature=0.7)
    words = [w.strip() for w in response.strip().split(",") if w.strip()]
    return words

# Create flashcards
def make_flashcards(words, language):
    prompt = (
        f"For each {language} word in the list, write a flashcard in this format:\n"
        f"Word: <word>\nDefinition: <definition>\nExample: <sentence>\nTranslation: <translation>\n\n"
        f"Words:\n{', '.join(words)}"
    )
    response = client.text_generation(prompt, max_new_tokens=512, temperature=0.7)
    return response.strip()

# OCR from image
def ocr_inference(img_path, lang_code):
    ocr = PaddleOCR(use_angle_cls=True, lang=lang_code, use_gpu=False)
    result = ocr.ocr(img_path, cls=True)[0]
    return " ".join([line[1][0] for line in result])

# Combined pipeline
def flashcard_pipeline(text, image, language):
    lang_code = {
        "korean": "korean",
        "japanese": "japan",
        "chinese": "ch",
        "english": "en",
    }.get(language.lower(), "en")

    if image:
        text = ocr_inference(image, lang_code)
    if not text:
        return "", "Please provide either text or an image."

    words = text_inference(text, language)
    flashcards = make_flashcards(words, language)
    return "\n".join(words), flashcards

# Gradio app
demo = gr.Interface(
    fn=flashcard_pipeline,
    inputs=[
        gr.Textbox(label="Input Text (leave blank if using image)", lines=4, placeholder="e.g. ν‘œν˜„μ΄ μ„œνˆ° 것도 잘λͺ»μΈκ°€μš”..."),
        gr.Image(type="filepath", label="Upload Image (optional, for OCR)"),
        gr.Dropdown(["korean", "japanese", "chinese", "english"], label="Language"),
    ],
    outputs=[
        gr.Textbox(label="Base Form Words"),
        gr.Textbox(label="Flashcards"),
    ],
    title="πŸ“˜ Language Flashcard Generator (OCR + LLM)",
    description="Input text or image. It extracts words, finds base forms, and generates flashcards using DeepSeek-V3-0324.",
)

if __name__ == "__main__":
    demo.launch()