Spaces:

dimasdeffieux
/

explain_lang

Sleeping

App Files Files Community

explain_lang / app.py

dimasdeffieux

Update app.py

685f947 verified 8 days ago

raw

history blame contribute delete

2.53 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	from paddleocr import PaddleOCR
	from PIL import Image

	# Use the hosted model
	client = InferenceClient("unsloth/DeepSeek-V3-0324-GGUF")

	# Extract words in base form
	def text_inference(text, language):
	prompt = (
	f"Given the following {language} text, convert each word into its base form. "
	f"Remove all duplicates. Return the base form words as a comma-separated list.\n\n"
	f"Text:\n{text}"
	)
	response = client.text_generation(prompt, max_new_tokens=256, temperature=0.7)
	words = [w.strip() for w in response.strip().split(",") if w.strip()]
	return words

	# Create flashcards
	def make_flashcards(words, language):
	prompt = (
	f"For each {language} word in the list, write a flashcard in this format:\n"
	f"Word: <word>\nDefinition: <definition>\nExample: <sentence>\nTranslation: <translation>\n\n"
	f"Words:\n{', '.join(words)}"
	)
	response = client.text_generation(prompt, max_new_tokens=512, temperature=0.7)
	return response.strip()

	# OCR from image
	def ocr_inference(img_path, lang_code):
	ocr = PaddleOCR(use_angle_cls=True, lang=lang_code, use_gpu=False)
	result = ocr.ocr(img_path, cls=True)[0]
	return " ".join([line[1][0] for line in result])

	# Combined pipeline
	def flashcard_pipeline(text, image, language):
	lang_code = {
	"korean": "korean",
	"japanese": "japan",
	"chinese": "ch",
	"english": "en",
	}.get(language.lower(), "en")

	if image:
	text = ocr_inference(image, lang_code)
	if not text:
	return "", "Please provide either text or an image."

	words = text_inference(text, language)
	flashcards = make_flashcards(words, language)
	return "\n".join(words), flashcards

	# Gradio app
	demo = gr.Interface(
	fn=flashcard_pipeline,
	inputs=[
	gr.Textbox(label="Input Text (leave blank if using image)", lines=4, placeholder="e.g. 표현이 서툰 것도 잘못인가요..."),
	gr.Image(type="filepath", label="Upload Image (optional, for OCR)"),
	gr.Dropdown(["korean", "japanese", "chinese", "english"], label="Language"),
	],
	outputs=[
	gr.Textbox(label="Base Form Words"),
	gr.Textbox(label="Flashcards"),
	],
	title="📘 Language Flashcard Generator (OCR + LLM)",
	description="Input text or image. It extracts words, finds base forms, and generates flashcards using DeepSeek-V3-0324.",
	)

	if __name__ == "__main__":
	demo.launch()