import gradio as gr from huggingface_hub import InferenceClient from paddleocr import PaddleOCR from PIL import Image # Use the hosted model client = InferenceClient("unsloth/DeepSeek-V3-0324-GGUF") # Extract words in base form def text_inference(text, language): prompt = ( f"Given the following {language} text, convert each word into its base form. " f"Remove all duplicates. Return the base form words as a comma-separated list.\n\n" f"Text:\n{text}" ) response = client.text_generation(prompt, max_new_tokens=256, temperature=0.7) words = [w.strip() for w in response.strip().split(",") if w.strip()] return words # Create flashcards def make_flashcards(words, language): prompt = ( f"For each {language} word in the list, write a flashcard in this format:\n" f"Word: \nDefinition: \nExample: \nTranslation: \n\n" f"Words:\n{', '.join(words)}" ) response = client.text_generation(prompt, max_new_tokens=512, temperature=0.7) return response.strip() # OCR from image def ocr_inference(img_path, lang_code): ocr = PaddleOCR(use_angle_cls=True, lang=lang_code, use_gpu=False) result = ocr.ocr(img_path, cls=True)[0] return " ".join([line[1][0] for line in result]) # Combined pipeline def flashcard_pipeline(text, image, language): lang_code = { "korean": "korean", "japanese": "japan", "chinese": "ch", "english": "en", }.get(language.lower(), "en") if image: text = ocr_inference(image, lang_code) if not text: return "", "Please provide either text or an image." words = text_inference(text, language) flashcards = make_flashcards(words, language) return "\n".join(words), flashcards # Gradio app demo = gr.Interface( fn=flashcard_pipeline, inputs=[ gr.Textbox(label="Input Text (leave blank if using image)", lines=4, placeholder="e.g. 표현이 서툰 것도 잘못인가요..."), gr.Image(type="filepath", label="Upload Image (optional, for OCR)"), gr.Dropdown(["korean", "japanese", "chinese", "english"], label="Language"), ], outputs=[ gr.Textbox(label="Base Form Words"), gr.Textbox(label="Flashcards"), ], title="📘 Language Flashcard Generator (OCR + LLM)", description="Input text or image. It extracts words, finds base forms, and generates flashcards using DeepSeek-V3-0324.", ) if __name__ == "__main__": demo.launch()