import gradio as gr
from huggingface_hub import InferenceClient
from paddleocr import PaddleOCR
from PIL import Image

# Use the hosted model
client = InferenceClient("unsloth/DeepSeek-V3-0324-GGUF")

# Extract words in base form
def text_inference(text, language):
    prompt = (
        f"Given the following {language} text, convert each word into its base form. "
        f"Remove all duplicates. Return the base form words as a comma-separated list.\n\n"
        f"Text:\n{text}"
    )
    response = client.text_generation(prompt, max_new_tokens=256, temperature=0.7)
    words = [w.strip() for w in response.strip().split(",") if w.strip()]
    return words

# Create flashcards
def make_flashcards(words, language):
    prompt = (
        f"For each {language} word in the list, write a flashcard in this format:\n"
        f"Word: <word>\nDefinition: <definition>\nExample: <sentence>\nTranslation: <translation>\n\n"
        f"Words:\n{', '.join(words)}"
    )
    response = client.text_generation(prompt, max_new_tokens=512, temperature=0.7)
    return response.strip()

# OCR from image
def ocr_inference(img_path, lang_code):
    ocr = PaddleOCR(use_angle_cls=True, lang=lang_code, use_gpu=False)
    result = ocr.ocr(img_path, cls=True)[0]
    return " ".join([line[1][0] for line in result])

# Combined pipeline
def flashcard_pipeline(text, image, language):
    lang_code = {
        "korean": "korean",
        "japanese": "japan",
        "chinese": "ch",
        "english": "en",
    }.get(language.lower(), "en")

    if image:
        text = ocr_inference(image, lang_code)
    if not text:
        return "", "Please provide either text or an image."

    words = text_inference(text, language)
    flashcards = make_flashcards(words, language)
    return "\n".join(words), flashcards

# Gradio app
demo = gr.Interface(
    fn=flashcard_pipeline,
    inputs=[
        gr.Textbox(label="Input Text (leave blank if using image)", lines=4, placeholder="e.g. 표현이 서툰 것도 잘못인가요..."),
        gr.Image(type="filepath", label="Upload Image (optional, for OCR)"),
        gr.Dropdown(["korean", "japanese", "chinese", "english"], label="Language"),
    ],
    outputs=[
        gr.Textbox(label="Base Form Words"),
        gr.Textbox(label="Flashcards"),
    ],
    title="📘 Language Flashcard Generator (OCR + LLM)",
    description="Input text or image. It extracts words, finds base forms, and generates flashcards using DeepSeek-V3-0324.",
)

if __name__ == "__main__":
    demo.launch()