themissingCRAM commited on
Commit
c69bef9
·
1 Parent(s): 0191153

new dataset

Browse files
Files changed (1) hide show
  1. app.py +52 -45
app.py CHANGED
@@ -6,6 +6,8 @@ from dotenv import load_dotenv
6
  import datasets
7
  from langchain.docstore.document import Document
8
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
 
9
  import chromadb
10
  from chromadb.utils import embedding_functions
11
 
@@ -90,14 +92,15 @@ class RetrieverTool(Tool):
90
 
91
 
92
  if __name__ == "__main__":
93
- knowledge_base = datasets.load_dataset("MuskumPillerum/General-Knowledge")
94
-
95
- source_docs = [
96
- Document(
97
- page_content=doc["Answer"], metadata={"question": doc["Question"]}
98
- )
99
- for doc in knowledge_base
100
- ]
 
101
  text_splitter = RecursiveCharacterTextSplitter(
102
  chunk_size=500,
103
  chunk_overlap=50,
@@ -106,40 +109,44 @@ if __name__ == "__main__":
106
  separators=["\n\n", "\n", ".", " ", ""],
107
  )
108
  docs_processed = text_splitter.split_documents(source_docs)
109
- # retriever_tool = RetrieverTool(docs_processed)
110
- # model = HfApiModel(
111
- # # model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
112
- # model_id="meta-llama/Meta-Llama-3.1-8B-Instruct",
113
- # token=os.getenv("agentic_rag_inference_token"),
114
- # )
115
- #
116
- # agent = CodeAgent(
117
- # tools=[retriever_tool],
118
- # model=model,
119
- # max_steps=10,
120
- # verbosity_level=10,
121
- # )
122
- # def enter_message(new_message, conversation_history):
123
- # conversation_history.append(gr.ChatMessage(role="user", content=new_message))
124
- # yield "", conversation_history
125
- # for msg in stream_to_gradio(agent, new_message):
126
- # conversation_history.append(msg)
127
- # yield "", conversation_history
128
- #
129
- #
130
- # def clear_message(chat_history: list):
131
- # return chat_history.clear()
132
- #
133
- # with gr.Blocks() as b:
134
- # chatbot = gr.Chatbot(type="messages", height=1000)
135
- # textbox = gr.Textbox(lines=3, label="")
136
- # enter_button = gr.Button("enter")
137
- # stop_generating_button = gr.Button("stop generating")
138
- # clear_messages_button = gr.Button("clear messages")
139
- # reply_button_click_event = enter_button.click(enter_message, [textbox, chatbot], [textbox, chatbot])
140
- # textbox.submit(enter_message, [textbox, chatbot], [textbox, chatbot])
141
- # clear_messages_button.click(fn=clear_message, inputs=chatbot, outputs=[chatbot, textbox],
142
- # cancels=[reply_button_click_event])
143
- # stop_generating_button.click(cancels=[reply_button_click_event])
144
- #
145
- # b.launch()
 
 
 
 
 
6
  import datasets
7
  from langchain.docstore.document import Document
8
  from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+
10
+ from langchain_community.document_loaders import HuggingFaceDatasetLoader
11
  import chromadb
12
  from chromadb.utils import embedding_functions
13
 
 
92
 
93
 
94
  if __name__ == "__main__":
95
+ # knowledge_base = datasets.load_dataset("MuskumPillerum/General-Knowledge", split="train")
96
+ # print(knowledge_base.column_names)
97
+ # source_docs = [
98
+ # Document(
99
+ # page_content=doc["Answer"], metadata={"question": doc["Question"]}
100
+ # )
101
+ # for doc in knowledge_base
102
+ # ]
103
+ source_docs = HuggingFaceDatasetLoader("MuskumPillerum/General-Knowledge", "Answer").load()
104
  text_splitter = RecursiveCharacterTextSplitter(
105
  chunk_size=500,
106
  chunk_overlap=50,
 
109
  separators=["\n\n", "\n", ".", " ", ""],
110
  )
111
  docs_processed = text_splitter.split_documents(source_docs)
112
+ print(docs_processed)
113
+ retriever_tool = RetrieverTool(docs_processed)
114
+ model = HfApiModel(
115
+ # model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
116
+ model_id="meta-llama/Meta-Llama-3.1-8B-Instruct",
117
+ token=os.getenv("agentic_rag_inference_token"),
118
+ )
119
+
120
+ agent = CodeAgent(
121
+ tools=[retriever_tool],
122
+ model=model,
123
+ max_steps=10,
124
+ verbosity_level=10,
125
+ )
126
+
127
+
128
+ def enter_message(new_message, conversation_history):
129
+ conversation_history.append(gr.ChatMessage(role="user", content=new_message))
130
+ yield "", conversation_history
131
+ for msg in stream_to_gradio(agent, new_message):
132
+ conversation_history.append(msg)
133
+ yield "", conversation_history
134
+
135
+
136
+ def clear_message(chat_history: list):
137
+ return chat_history.clear()
138
+
139
+
140
+ with gr.Blocks() as b:
141
+ chatbot = gr.Chatbot(type="messages", height=1000)
142
+ textbox = gr.Textbox(lines=3, label="")
143
+ enter_button = gr.Button("enter")
144
+ stop_generating_button = gr.Button("stop generating")
145
+ clear_messages_button = gr.Button("clear messages")
146
+ reply_button_click_event = enter_button.click(enter_message, [textbox, chatbot], [textbox, chatbot])
147
+ textbox.submit(enter_message, [textbox, chatbot], [textbox, chatbot])
148
+ clear_messages_button.click(fn=clear_message, inputs=chatbot, outputs=[chatbot, textbox],
149
+ cancels=[reply_button_click_event])
150
+ stop_generating_button.click(cancels=[reply_button_click_event])
151
+
152
+ b.launch()