Spaces:
Running
Running
Better chat ui
Browse files- README.md +3 -2
- search_agent.py +1 -1
- search_agent_ui.py +8 -17
- web_crawler.py +0 -1
README.md
CHANGED
@@ -15,8 +15,9 @@ license: apache-2.0
|
|
15 |
This Python project provides a search agent that can perform web searches, optimize search queries, fetch and process web content, and generate responses using a language model and the retrieved information.
|
16 |
Does a bit what [Perplexity AI](https://www.perplexity.ai/) does.
|
17 |
|
|
|
18 |
|
19 |
-
This Python script
|
20 |
|
21 |
The main functionality of the script can be summarized as follows:
|
22 |
|
@@ -34,7 +35,7 @@ To run the script, users need to provide their API keys for the desired language
|
|
34 |
|
35 |
## Features
|
36 |
|
37 |
-
- Supports multiple language model providers (Bedrock, OpenAI, Groq, and Ollama)
|
38 |
- Optimizes search queries using a language model
|
39 |
- Fetches web pages and extracts main content (HTML and PDF)
|
40 |
- Vectorizes the content for efficient retrieval
|
|
|
15 |
This Python project provides a search agent that can perform web searches, optimize search queries, fetch and process web content, and generate responses using a language model and the retrieved information.
|
16 |
Does a bit what [Perplexity AI](https://www.perplexity.ai/) does.
|
17 |
|
18 |
+
The Streamlit GUI hosted on 🤗 Sapces is [available to test](https://huggingface.co/spaces/CyranoB/search_agent)
|
19 |
|
20 |
+
This Python script and Streamli GUI are a basic search agent that utilizes the LangChain library to perform optimized web searches, retrieve relevant content, and generate informative answers to user queries. The script supports multiple language models and providers, including OpenAI, Anthropic, and Groq.
|
21 |
|
22 |
The main functionality of the script can be summarized as follows:
|
23 |
|
|
|
35 |
|
36 |
## Features
|
37 |
|
38 |
+
- Supports multiple language model providers (Bedrock, OpenAI, Groq, Cohere, and Ollama)
|
39 |
- Optimizes search queries using a language model
|
40 |
- Fetches web pages and extracts main content (HTML and PDF)
|
41 |
- Vectorizes the content for efficient retrieval
|
search_agent.py
CHANGED
@@ -79,7 +79,7 @@ if __name__ == '__main__':
|
|
79 |
query = arguments["SEARCH_QUERY"]
|
80 |
|
81 |
chat = wr.get_chat_llm(provider, model, temperature)
|
82 |
-
|
83 |
|
84 |
with console.status(f"[bold green]Optimizing query for search: {query}"):
|
85 |
optimize_search_query = wr.optimize_search_query(chat, query, callbacks=callbacks)
|
|
|
79 |
query = arguments["SEARCH_QUERY"]
|
80 |
|
81 |
chat = wr.get_chat_llm(provider, model, temperature)
|
82 |
+
console.log(f"Using {chat.model} on {provider} with temperature {temperature}")
|
83 |
|
84 |
with console.status(f"[bold green]Optimizing query for search: {query}"):
|
85 |
optimize_search_query = wr.optimize_search_query(chat, query, callbacks=callbacks)
|
search_agent_ui.py
CHANGED
@@ -53,29 +53,20 @@ if prompt := st.chat_input("Enter you instructions...", disabled=st.session_stat
|
|
53 |
st.chat_message("user").write(prompt)
|
54 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
with st.spinner("Optimizing search query"):
|
61 |
optimize_search_query = wr.optimize_search_query(chat, query=prompt, callbacks=[ls_tracer])
|
62 |
-
|
63 |
-
|
64 |
-
st.chat_message("assistant").write(message)
|
65 |
-
st.session_state.messages.append({"role": "assistant", "content": message})
|
66 |
-
|
67 |
-
with st.spinner(f"Searching the web for: {optimize_search_query}"):
|
68 |
sources = wc.get_sources(optimize_search_query, max_pages=20)
|
69 |
|
70 |
-
|
71 |
contents = wc.get_links_contents(sources)
|
72 |
|
73 |
-
|
74 |
vector_store = wc.vectorize(contents)
|
75 |
-
|
76 |
-
message = f"Got {vector_store.index.ntotal} chunk of data"
|
77 |
-
st.chat_message("assistant").write(message)
|
78 |
-
st.session_state.messages.append({"role": "assistant", "content": message})
|
79 |
|
80 |
rag_prompt = wr.build_rag_prompt(prompt, optimize_search_query, vector_store, top_k=5, callbacks=[ls_tracer])
|
81 |
with st.chat_message("assistant"):
|
|
|
53 |
st.chat_message("user").write(prompt)
|
54 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
55 |
|
56 |
+
with st.status("Thinking", expanded=True):
|
57 |
+
st.write("I first need to do some research")
|
58 |
+
|
|
|
|
|
59 |
optimize_search_query = wr.optimize_search_query(chat, query=prompt, callbacks=[ls_tracer])
|
60 |
+
st.write(f"I should search the web for: {optimize_search_query}")
|
61 |
+
|
|
|
|
|
|
|
|
|
62 |
sources = wc.get_sources(optimize_search_query, max_pages=20)
|
63 |
|
64 |
+
st.write(f"I'll now retrieve the {len(sources)} webpages and documents I found")
|
65 |
contents = wc.get_links_contents(sources)
|
66 |
|
67 |
+
st.write( f"Reading through the {len(contents)} sources I managed to retrieve")
|
68 |
vector_store = wc.vectorize(contents)
|
69 |
+
st.write(f"I collected {vector_store.index.ntotal} chunk of data and I can now answer")
|
|
|
|
|
|
|
70 |
|
71 |
rag_prompt = wr.build_rag_prompt(prompt, optimize_search_query, vector_store, top_k=5, callbacks=[ls_tracer])
|
72 |
with st.chat_message("assistant"):
|
web_crawler.py
CHANGED
@@ -137,7 +137,6 @@ def vectorize(contents):
|
|
137 |
print(f"[gray]Error processing content for {content['link']}: {e}")
|
138 |
semantic_chunker = SemanticChunker(OpenAIEmbeddings(model="text-embedding-3-large"), breakpoint_threshold_type="percentile")
|
139 |
docs = semantic_chunker.split_documents(documents)
|
140 |
-
print(f"Vectorizing {len(docs)} document chunks")
|
141 |
embeddings = OpenAIEmbeddings()
|
142 |
store = FAISS.from_documents(docs, embeddings)
|
143 |
return store
|
|
|
137 |
print(f"[gray]Error processing content for {content['link']}: {e}")
|
138 |
semantic_chunker = SemanticChunker(OpenAIEmbeddings(model="text-embedding-3-large"), breakpoint_threshold_type="percentile")
|
139 |
docs = semantic_chunker.split_documents(documents)
|
|
|
140 |
embeddings = OpenAIEmbeddings()
|
141 |
store = FAISS.from_documents(docs, embeddings)
|
142 |
return store
|