Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -40,6 +40,23 @@ def ocr_inference(img, lang):
|
|
40 |
return txts
|
41 |
|
42 |
def make_flashcards(words):
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
print("OUTPUT TOUT OUETOI EIFJ IEFJ",text_inference(text, "korean"))
|
|
|
|
40 |
return txts
|
41 |
|
42 |
def make_flashcards(words):
|
43 |
+
cards=[]
|
44 |
+
for word in words:
|
45 |
+
system_prompt = (
|
46 |
+
f"Given the following {language} word, create an anki flashcard which has the word, the definition, and an example sentence using that word."
|
47 |
+
"return the flashcard as each field separated by colons"
|
48 |
+
)
|
49 |
+
user_prompt = f"{system_prompt}\n\nText:\n{text}"
|
50 |
+
|
51 |
+
input_ids = tokenizer.apply_chat_template([{"role": "user", "content": user_prompt}], return_tensors="pt").to(model.device)
|
52 |
+
output_ids = model.generate(input_ids, max_new_tokens=256)
|
53 |
+
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
54 |
+
|
55 |
+
# Parse response: take last line, split by commas
|
56 |
+
last_line = output_text.strip().split("\n")[-1]
|
57 |
+
card = [w.strip() for w in last_line.split(":") if w.strip()]
|
58 |
+
cards.add(card)
|
59 |
+
return cards
|
60 |
|
61 |
print("OUTPUT TOUT OUETOI EIFJ IEFJ",text_inference(text, "korean"))
|
62 |
+
print("flashcard output:",make_flashcards(words))
|