Spaces:

beyoru
/

Demo_reading_table

Sleeping

App Files Files Community

beyoru commited on Mar 22

Commit

74d34f4

verified ·

1 Parent(s): 12f9bba

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -34

app.py CHANGED Viewed

@@ -31,48 +31,59 @@ row_texts = df.apply(lambda row: " | ".join(row.astype(str)), axis=1)
 row_embeddings = embedding_model.encode(row_texts.tolist(), convert_to_tensor=True)
 # Load mô hình Qwen và tokenizer cho việc tạo phản hồi
-fc_model = AutoModelForCausalLM.from_pretrained('Qwen/Qwen2.5-3B-Instruct', torch_dtype=torch.float16)\
-            #.to("cuda")
 fc_tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen2.5-3B-Instruct')
 # --------------------------
 # Hàm tạo phản hồi streaming theo thời gian thực
 # --------------------------
-def generate_response(user_query: str):
     """
     Hàm này sẽ:
-      - Tính embedding cho câu truy vấn của người dùng.
-      - Chọn ra top 3 cột và top 10 dòng phù hợp từ dữ liệu.
-      - Tạo system prompt bao gồm bảng dữ liệu đã được format bằng tabulate.
-      - Sử dụng TextIteratorStreamer để stream phản hồi từ mô hình theo thời gian thực.
     """
-    # Tính embedding cho câu truy vấn
-    question_embedding = embedding_model.encode(user_query, convert_to_tensor=True)
     # Chọn top 7 cột phù hợp
-    k = 7
-    column_similarities = util.cos_sim(question_embedding, column_embeddings)[0]
     best_column_indices = torch.topk(column_similarities, k).indices.tolist()
     best_column_names = [column_names[i] for i in best_column_indices]
     # Chọn top 10 dòng phù hợp
-    row_similarities = util.cos_sim(question_embedding, row_embeddings).squeeze(0)
     m = 10
     best_row_indices = torch.topk(row_similarities, m).indices.tolist()
     filtered_df = df.iloc[best_row_indices][best_column_names]
-    # Format bảng dữ liệu sử dụng tabulate
     from tabulate import tabulate
     table_text = tabulate(filtered_df, headers=best_column_names, tablefmt="grid")
-    # Tạo system prompt chứa thông tin bảng dữ liệu (feat GPT-4)
     system_prompt = f"""\
 **Notes: Always respond in Vietnamese**
-Bạn là một trợ lý báo cáo sản xuất thông minh.
-**Chỉ báo cáo về bảng dưới đây nếu người dùng yêu cầu, nếu không thì cứ giao tiếp bình thường.**
-Dưới đây là dữ liệu bạn cần phân tích và tổng hợp dữ liệu một cách rõ ràng, dễ hiểu:
 🔹 Các cột dữ liệu liên quan: {', '.join(best_column_names)}
 🔹 Bảng dữ liệu:
 {table_text}
@@ -92,16 +103,23 @@ Nếu có thể, đề xuất giải pháp hoặc hành động tiếp theo.
 🚀 "Nếu duy trì tốc độ này, sản lượng tháng có thể vượt kế hoạch 10%."
 🚀 "Không có gì, nếu bạn cần thêm thông tin chi tiết hãy nói cho tôi biết nhé."
 """
-    messages = [
-        {'role': 'system', 'content': system_prompt},
-        {'role': 'user', 'content': user_query}
-    ]
     response_template = fc_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    response_inputs = fc_tokenizer(response_template, return_tensors="pt")\
-            #.to("cuda")
-    # Dùng TextIteratorStreamer để stream phản hồi
     streamer = TextIteratorStreamer(fc_tokenizer, skip_prompt=True, skip_special_tokens=True)
     thread = threading.Thread(
@@ -120,22 +138,21 @@ Nếu có thể, đề xuất giải pháp hoặc hành động tiếp theo.
         collected_text += new_text
         yield collected_text
-# --------------------------
-# Hàm giao diện chat
-# --------------------------
 def chat_interface(user_message, history):
     """
-    Hàm này sẽ:
-      - Thêm tin nhắn của người dùng vào lịch sử chat (dưới dạng cặp [tin nhắn người dùng, phản hồi AI]).
-      - Stream phản hồi từ mô hình theo thời gian thực và cập nhật lịch sử.
     """
     history.append([user_message, ""])
     yield "", history
-    for partial_response in generate_response(user_message):
         history[-1][1] = partial_response
         yield "", history
 # --------------------------
 # Xây dựng giao diện Gradio với 2 tab: Chat và Production Data Sample
 # --------------------------
@@ -154,4 +171,4 @@ with gr.Blocks() as demo:
         with gr.TabItem("Production Data Sample"):
             # gr.Markdown("Dưới đây là bảng **production_data** mẫu:")
             production_table = gr.Dataframe(value=production_data_df, label="Production Data Sample")
-    demo.launch()

 row_embeddings = embedding_model.encode(row_texts.tolist(), convert_to_tensor=True)
 # Load mô hình Qwen và tokenizer cho việc tạo phản hồi
+fc_model = AutoModelForCausalLM.from_pretrained('Qwen/Qwen2.5-3B-Instruct', torch_dtype=torch.float16)
+    #.to("cuda")
 fc_tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen2.5-3B-Instruct')
 # --------------------------
 # Hàm tạo phản hồi streaming theo thời gian thực
 # --------------------------
+def generate_response(user_query: str, history):
     """
     Hàm này sẽ:
+      - Sử dụng 2 cuộc đối thoại gần nhất từ history để tính embedding.
+      - Dựa trên embedding này, chọn ra top 7 cột và top 10 dòng phù hợp.
+      - Nạp lịch sử (ví dụ 10 lượt đối thoại gần nhất) vào messages để mô hình có "ký ức".
+      - Sử dụng TextIteratorStreamer để stream phản hồi từ mô hình.
     """
+    # --- Phần tính embedding chỉ dùng 2 cuộc đối thoại gần nhất ---
+    num_exchanges_for_embedding = 1
+    embedding_history = history[-num_exchanges_for_embedding:] if len(history) >= num_exchanges_for_embedding else history
+    # Ghép các lượt đối thoại (chỉ những lượt đã có phản hồi) thành chuỗi context
+    conversation_context = " ".join(
+        [f"User: {turn[0]} Assistant: {turn[1]}" for turn in embedding_history if turn[1]]
+    )
+    if conversation_context.strip() == "":
+        conversation_context = user_query
+    # Tính embedding cho context
+    context_embedding = embedding_model.encode(conversation_context, convert_to_tensor=True)
+    # --- Chọn dữ liệu từ DataFrame dựa trên embedding ---
     # Chọn top 7 cột phù hợp
+    k = 10
+    column_similarities = util.cos_sim(context_embedding, column_embeddings)[0]
     best_column_indices = torch.topk(column_similarities, k).indices.tolist()
     best_column_names = [column_names[i] for i in best_column_indices]
     # Chọn top 10 dòng phù hợp
+    row_similarities = util.cos_sim(context_embedding, row_embeddings).squeeze(0)
     m = 10
     best_row_indices = torch.topk(row_similarities, m).indices.tolist()
     filtered_df = df.iloc[best_row_indices][best_column_names]
+    # Format bảng dữ liệu dùng tabulate
     from tabulate import tabulate
     table_text = tabulate(filtered_df, headers=best_column_names, tablefmt="grid")
+    # --- Tạo system prompt chứa thông tin bảng dữ liệu ---
     system_prompt = f"""\
 **Notes: Always respond in Vietnamese**
+Bạn là một trợ lý báo cáo sản xuất thông minh đồng thời là một người bạn thân thiện.
+**Chỉ báo cáo về bảng dưới đây nếu người dùng yêu cầu, nếu không thì cứ giao tiếp tự nhiên và đừng đề cập gì đến bảng.**
+Dưới đây là dữ liệu bạn cần phân tích và tổng hợp:
 🔹 Các cột dữ liệu liên quan: {', '.join(best_column_names)}
 🔹 Bảng dữ liệu:
 {table_text}
 🚀 "Nếu duy trì tốc độ này, sản lượng tháng có thể vượt kế hoạch 10%."
 🚀 "Không có gì, nếu bạn cần thêm thông tin chi tiết hãy nói cho tôi biết nhé."
 """
+    print(table_text)
+    # --- Nạp lịch sử đối thoại vào messages để mô hình có "ký ức" ---
+    num_exchanges_for_messages = 10
+    messages_history = history[-num_exchanges_for_messages:] if len(history) > num_exchanges_for_messages else history
+    messages = [{'role': 'system', 'content': system_prompt}]
+    for turn in messages_history[:-1]:
+        messages.append({'role': 'user', 'content': turn[0]})
+        messages.append({'role': 'assistant', 'content': turn[1]})
+    # Thêm lượt hiện tại (chỉ tin nhắn của user, chưa có phản hồi)
+    messages.append({'role': 'user', 'content': messages_history[-1][0]})
     response_template = fc_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    response_inputs = fc_tokenizer(response_template, return_tensors="pt")
+    #.to("cuda")
+    # --- Stream phản hồi từ mô hình ---
     streamer = TextIteratorStreamer(fc_tokenizer, skip_prompt=True, skip_special_tokens=True)
     thread = threading.Thread(
         collected_text += new_text
         yield collected_text
 def chat_interface(user_message, history):
     """
+    Hàm này:
+      - Thêm tin nhắn mới của người dùng vào history.
+      - Gọi generate_response với history (nạp cả lịch sử vào messages và dùng 2 lượt đối thoại gần nhất cho embedding).
+      - Stream phản hồi từ mô hình và cập nhật history.
     """
     history.append([user_message, ""])
     yield "", history
+    for partial_response in generate_response(user_message, history):
         history[-1][1] = partial_response
         yield "", history
 # --------------------------
 # Xây dựng giao diện Gradio với 2 tab: Chat và Production Data Sample
 # --------------------------
         with gr.TabItem("Production Data Sample"):
             # gr.Markdown("Dưới đây là bảng **production_data** mẫu:")
             production_table = gr.Dataframe(value=production_data_df, label="Production Data Sample")
+    demo.launch(debug=True)