Spaces:
Paused
Paused
themissingCRAM
commited on
Commit
·
c69bef9
1
Parent(s):
0191153
new dataset
Browse files
app.py
CHANGED
@@ -6,6 +6,8 @@ from dotenv import load_dotenv
|
|
6 |
import datasets
|
7 |
from langchain.docstore.document import Document
|
8 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
|
|
9 |
import chromadb
|
10 |
from chromadb.utils import embedding_functions
|
11 |
|
@@ -90,14 +92,15 @@ class RetrieverTool(Tool):
|
|
90 |
|
91 |
|
92 |
if __name__ == "__main__":
|
93 |
-
knowledge_base = datasets.load_dataset("MuskumPillerum/General-Knowledge")
|
94 |
-
|
95 |
-
source_docs = [
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
]
|
|
|
101 |
text_splitter = RecursiveCharacterTextSplitter(
|
102 |
chunk_size=500,
|
103 |
chunk_overlap=50,
|
@@ -106,40 +109,44 @@ if __name__ == "__main__":
|
|
106 |
separators=["\n\n", "\n", ".", " ", ""],
|
107 |
)
|
108 |
docs_processed = text_splitter.split_documents(source_docs)
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
|
|
|
6 |
import datasets
|
7 |
from langchain.docstore.document import Document
|
8 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
9 |
+
|
10 |
+
from langchain_community.document_loaders import HuggingFaceDatasetLoader
|
11 |
import chromadb
|
12 |
from chromadb.utils import embedding_functions
|
13 |
|
|
|
92 |
|
93 |
|
94 |
if __name__ == "__main__":
|
95 |
+
# knowledge_base = datasets.load_dataset("MuskumPillerum/General-Knowledge", split="train")
|
96 |
+
# print(knowledge_base.column_names)
|
97 |
+
# source_docs = [
|
98 |
+
# Document(
|
99 |
+
# page_content=doc["Answer"], metadata={"question": doc["Question"]}
|
100 |
+
# )
|
101 |
+
# for doc in knowledge_base
|
102 |
+
# ]
|
103 |
+
source_docs = HuggingFaceDatasetLoader("MuskumPillerum/General-Knowledge", "Answer").load()
|
104 |
text_splitter = RecursiveCharacterTextSplitter(
|
105 |
chunk_size=500,
|
106 |
chunk_overlap=50,
|
|
|
109 |
separators=["\n\n", "\n", ".", " ", ""],
|
110 |
)
|
111 |
docs_processed = text_splitter.split_documents(source_docs)
|
112 |
+
print(docs_processed)
|
113 |
+
retriever_tool = RetrieverTool(docs_processed)
|
114 |
+
model = HfApiModel(
|
115 |
+
# model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
|
116 |
+
model_id="meta-llama/Meta-Llama-3.1-8B-Instruct",
|
117 |
+
token=os.getenv("agentic_rag_inference_token"),
|
118 |
+
)
|
119 |
+
|
120 |
+
agent = CodeAgent(
|
121 |
+
tools=[retriever_tool],
|
122 |
+
model=model,
|
123 |
+
max_steps=10,
|
124 |
+
verbosity_level=10,
|
125 |
+
)
|
126 |
+
|
127 |
+
|
128 |
+
def enter_message(new_message, conversation_history):
|
129 |
+
conversation_history.append(gr.ChatMessage(role="user", content=new_message))
|
130 |
+
yield "", conversation_history
|
131 |
+
for msg in stream_to_gradio(agent, new_message):
|
132 |
+
conversation_history.append(msg)
|
133 |
+
yield "", conversation_history
|
134 |
+
|
135 |
+
|
136 |
+
def clear_message(chat_history: list):
|
137 |
+
return chat_history.clear()
|
138 |
+
|
139 |
+
|
140 |
+
with gr.Blocks() as b:
|
141 |
+
chatbot = gr.Chatbot(type="messages", height=1000)
|
142 |
+
textbox = gr.Textbox(lines=3, label="")
|
143 |
+
enter_button = gr.Button("enter")
|
144 |
+
stop_generating_button = gr.Button("stop generating")
|
145 |
+
clear_messages_button = gr.Button("clear messages")
|
146 |
+
reply_button_click_event = enter_button.click(enter_message, [textbox, chatbot], [textbox, chatbot])
|
147 |
+
textbox.submit(enter_message, [textbox, chatbot], [textbox, chatbot])
|
148 |
+
clear_messages_button.click(fn=clear_message, inputs=chatbot, outputs=[chatbot, textbox],
|
149 |
+
cancels=[reply_button_click_event])
|
150 |
+
stop_generating_button.click(cancels=[reply_button_click_event])
|
151 |
+
|
152 |
+
b.launch()
|