Spaces:

CyranoB
/

search_agent

Running

App Files Files Community

CyranoB commited on Apr 14, 2024

Commit

bda01ad

1 Parent(s): 6303c28

Better chat ui

Browse files

Files changed (4) hide show

README.md +3 -2
search_agent.py +1 -1
search_agent_ui.py +8 -17
web_crawler.py +0 -1

README.md CHANGED Viewed

@@ -15,8 +15,9 @@ license: apache-2.0
 This Python project provides a search agent that can perform web searches, optimize search queries, fetch and process web content, and generate responses using a language model and the retrieved information.
 Does a bit what [Perplexity AI](https://www.perplexity.ai/) does.
-This Python script is a search agent that utilizes the LangChain library to perform optimized web searches, retrieve relevant content, and generate informative answers to user queries. The script supports multiple language models and providers, including OpenAI, Anthropic, and Groq.
 The main functionality of the script can be summarized as follows:
@@ -34,7 +35,7 @@ To run the script, users need to provide their API keys for the desired language
 ## Features
-- Supports multiple language model providers (Bedrock, OpenAI, Groq, and Ollama)
 - Optimizes search queries using a language model
 - Fetches web pages and extracts main content (HTML and PDF)
 - Vectorizes the content for efficient retrieval

 This Python project provides a search agent that can perform web searches, optimize search queries, fetch and process web content, and generate responses using a language model and the retrieved information.
 Does a bit what [Perplexity AI](https://www.perplexity.ai/) does.
+The Streamlit GUI hosted on 🤗 Sapces is [available to test](https://huggingface.co/spaces/CyranoB/search_agent)
+This Python script and Streamli GUI are a basic search agent that utilizes the LangChain library to perform optimized web searches, retrieve relevant content, and generate informative answers to user queries. The script supports multiple language models and providers, including OpenAI, Anthropic, and Groq.
 The main functionality of the script can be summarized as follows:
 ## Features
+- Supports multiple language model providers (Bedrock, OpenAI, Groq, Cohere, and Ollama)
 - Optimizes search queries using a language model
 - Fetches web pages and extracts main content (HTML and PDF)
 - Vectorizes the content for efficient retrieval

search_agent.py CHANGED Viewed

@@ -79,7 +79,7 @@ if __name__ == '__main__':
     query = arguments["SEARCH_QUERY"]
     chat = wr.get_chat_llm(provider, model, temperature)
-    #console.log(f"Using {model} on {provider} with temperature {temperature}")
     with console.status(f"[bold green]Optimizing query for search: {query}"):
         optimize_search_query = wr.optimize_search_query(chat, query, callbacks=callbacks)

     query = arguments["SEARCH_QUERY"]
     chat = wr.get_chat_llm(provider, model, temperature)
+    console.log(f"Using {chat.model} on {provider} with temperature {temperature}")
     with console.status(f"[bold green]Optimizing query for search: {query}"):
         optimize_search_query = wr.optimize_search_query(chat, query, callbacks=callbacks)

search_agent_ui.py CHANGED Viewed

@@ -53,29 +53,20 @@ if prompt := st.chat_input("Enter you instructions...", disabled=st.session_stat
     st.chat_message("user").write(prompt)
     st.session_state.messages.append({"role": "user", "content": prompt})
-    message = "I first need to do some research"
-    st.chat_message("assistant").write(message)
-    st.session_state.messages.append({"role": "assistant", "content": message})
-    with st.spinner("Optimizing search query"):
         optimize_search_query = wr.optimize_search_query(chat, query=prompt, callbacks=[ls_tracer])
-    message = f"I'll search the web for: {optimize_search_query}"
-    st.chat_message("assistant").write(message)
-    st.session_state.messages.append({"role": "assistant", "content": message})
-    with st.spinner(f"Searching the web for: {optimize_search_query}"):
         sources = wc.get_sources(optimize_search_query, max_pages=20)
-    with st.spinner(f"I'm now retrieveing the {len(sources)} webpages and documents I found (be patient)"):
         contents = wc.get_links_contents(sources)
-    with st.spinner( f"Reading through the {len(contents)} sources I managed to retrieve"):
         vector_store = wc.vectorize(contents)
-    message = f"Got {vector_store.index.ntotal} chunk of data"
-    st.chat_message("assistant").write(message)
-    st.session_state.messages.append({"role": "assistant", "content": message})
     rag_prompt = wr.build_rag_prompt(prompt, optimize_search_query, vector_store, top_k=5, callbacks=[ls_tracer])
     with st.chat_message("assistant"):

     st.chat_message("user").write(prompt)
     st.session_state.messages.append({"role": "user", "content": prompt})
+    with st.status("Thinking", expanded=True):
+        st.write("I first need to do some research")
         optimize_search_query = wr.optimize_search_query(chat, query=prompt, callbacks=[ls_tracer])
+        st.write(f"I should search the web for: {optimize_search_query}")
         sources = wc.get_sources(optimize_search_query, max_pages=20)
+        st.write(f"I'll now retrieve the {len(sources)} webpages and documents I found")
         contents = wc.get_links_contents(sources)
+        st.write( f"Reading through the {len(contents)} sources I managed to retrieve")
         vector_store = wc.vectorize(contents)
+        st.write(f"I collected {vector_store.index.ntotal} chunk of data and I can now answer")
     rag_prompt = wr.build_rag_prompt(prompt, optimize_search_query, vector_store, top_k=5, callbacks=[ls_tracer])
     with st.chat_message("assistant"):

web_crawler.py CHANGED Viewed

@@ -137,7 +137,6 @@ def vectorize(contents):
             print(f"[gray]Error processing content for {content['link']}: {e}")
     semantic_chunker = SemanticChunker(OpenAIEmbeddings(model="text-embedding-3-large"), breakpoint_threshold_type="percentile")
     docs = semantic_chunker.split_documents(documents)
-    print(f"Vectorizing {len(docs)} document chunks")
     embeddings = OpenAIEmbeddings()
     store = FAISS.from_documents(docs, embeddings)
     return store

             print(f"[gray]Error processing content for {content['link']}: {e}")
     semantic_chunker = SemanticChunker(OpenAIEmbeddings(model="text-embedding-3-large"), breakpoint_threshold_type="percentile")
     docs = semantic_chunker.split_documents(documents)
     embeddings = OpenAIEmbeddings()
     store = FAISS.from_documents(docs, embeddings)
     return store