Spaces:

zhixiusue
/

Edutube

Sleeping

App Files Files Community

zhixiusue commited on 29 days ago

Commit

ee377c9

verified ·

1 Parent(s): 285a9e6

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +17 -13

streamlit_app.py CHANGED Viewed

@@ -2,6 +2,10 @@ import streamlit as st
 from transformers import AutoTokenizer, AutoModelForTokenClassification
 import torch
 id_to_label = {
     0: 'O',
     1: 'B-TOPIC',
@@ -16,8 +20,8 @@ id_to_label = {
 @st.cache_resource
 def load_model():
-    tokenizer = AutoTokenizer.from_pretrained(".")
-    model = AutoModelForTokenClassification.from_pretrained(".")
     return tokenizer, model
 tokenizer, model = load_model()
@@ -28,8 +32,8 @@ def predict(text, model, tokenizer, id_to_label):
     model.eval()
     with torch.no_grad():
         outputs = model(**inputs)
-    logits = outputs.logits
-    predictions = torch.argmax(logits, dim=-1)
     word_ids = inputs.word_ids(batch_index=0)
     pred_labels = []
@@ -47,10 +51,9 @@ def predict(text, model, tokenizer, id_to_label):
 def post_process(tokens, labels):
     words, word_labels = [], []
-    current_word = ""
-    current_label = None
     for token, label in zip(tokens, labels):
-        if token in ["[CLS]", "[SEP]", "[PAD]"]:
             continue
         if token.startswith("##"):
             current_word += token[2:]
@@ -80,13 +83,14 @@ def extract_entities(aligned_result):
         if prefix == "B":
             if current_entity:
                 entities.append({"entity": current_entity, "text": current_text})
-            current_entity, current_text = entity_type, word
         elif prefix == "I" and current_entity == entity_type:
             current_text += word
         else:
             if current_entity:
                 entities.append({"entity": current_entity, "text": current_text})
-            current_entity, current_text = entity_type, word
     if current_entity:
         entities.append({"entity": current_entity, "text": current_text})
     return entities
@@ -95,17 +99,17 @@ def extract_entities(aligned_result):
 st.title("🎯 Learning Condition Extractor")
 st.write("사용자의 학습 목표 문장에서 조건(TOPIC, STYLE, LENGTH, LANGUAGE)을 추출합니다.")
-user_input = st.text_input("학습 목표를 입력하세요:", value="딥러닝을 실습 위주로 30분 이내에 배우고 싶어요")
-if st.button("추론 시작"):
     tokens, pred_labels = predict(user_input, model, tokenizer, id_to_label)
     words, word_labels = post_process(tokens, pred_labels)
     aligned = align_words_labels(words, word_labels)
     entities = extract_entities(aligned)
-    result_dict = {'TOPIC': None, 'STYLE': None, 'LENGTH': None, 'LANGUAGE': None}
     for ent in entities:
-        result_dict[ent['entity']] = ent['text']
     st.subheader("📌 추출된 조건")
     st.json(result_dict)

 from transformers import AutoTokenizer, AutoModelForTokenClassification
 import torch
+# Hugging Face 모델 저장소 경로
+MODEL_REPO = "zhixiusue/EduTubeNavigator"
+# ID → 라벨 매핑 (사용자 정의)
 id_to_label = {
     0: 'O',
     1: 'B-TOPIC',
 @st.cache_resource
 def load_model():
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO)
+    model = AutoModelForTokenClassification.from_pretrained(MODEL_REPO)
     return tokenizer, model
 tokenizer, model = load_model()
     model.eval()
     with torch.no_grad():
         outputs = model(**inputs)
+        logits = outputs.logits
+        predictions = torch.argmax(logits, dim=-1)
     word_ids = inputs.word_ids(batch_index=0)
     pred_labels = []
 def post_process(tokens, labels):
     words, word_labels = [], []
+    current_word, current_label = None, None
     for token, label in zip(tokens, labels):
+        if token in ['[CLS]', '[SEP]', '[PAD]']:
             continue
         if token.startswith("##"):
             current_word += token[2:]
         if prefix == "B":
             if current_entity:
                 entities.append({"entity": current_entity, "text": current_text})
+            current_entity = entity_type
+            current_text = word
         elif prefix == "I" and current_entity == entity_type:
             current_text += word
         else:
             if current_entity:
                 entities.append({"entity": current_entity, "text": current_text})
+            current_entity, current_text = None, ""
     if current_entity:
         entities.append({"entity": current_entity, "text": current_text})
     return entities
 st.title("🎯 Learning Condition Extractor")
 st.write("사용자의 학습 목표 문장에서 조건(TOPIC, STYLE, LENGTH, LANGUAGE)을 추출합니다.")
+user_input = st.text_input("💬 학습 목표를 입력하세요", value="유튜브 영상은 실습 위주로 30분 이내에 배우고 싶어요")
+if st.button("🔍 추출 시작"):
     tokens, pred_labels = predict(user_input, model, tokenizer, id_to_label)
     words, word_labels = post_process(tokens, pred_labels)
     aligned = align_words_labels(words, word_labels)
     entities = extract_entities(aligned)
+    result_dict = {"TOPIC": None, "STYLE": None, "LENGTH": None, "LANGUAGE": None}
     for ent in entities:
+        result_dict[ent["entity"]] = ent["text"]
     st.subheader("📌 추출된 조건")
     st.json(result_dict)