CyranoB commited on
Commit
bda01ad
·
1 Parent(s): 6303c28

Better chat ui

Browse files
Files changed (4) hide show
  1. README.md +3 -2
  2. search_agent.py +1 -1
  3. search_agent_ui.py +8 -17
  4. web_crawler.py +0 -1
README.md CHANGED
@@ -15,8 +15,9 @@ license: apache-2.0
15
  This Python project provides a search agent that can perform web searches, optimize search queries, fetch and process web content, and generate responses using a language model and the retrieved information.
16
  Does a bit what [Perplexity AI](https://www.perplexity.ai/) does.
17
 
 
18
 
19
- This Python script is a search agent that utilizes the LangChain library to perform optimized web searches, retrieve relevant content, and generate informative answers to user queries. The script supports multiple language models and providers, including OpenAI, Anthropic, and Groq.
20
 
21
  The main functionality of the script can be summarized as follows:
22
 
@@ -34,7 +35,7 @@ To run the script, users need to provide their API keys for the desired language
34
 
35
  ## Features
36
 
37
- - Supports multiple language model providers (Bedrock, OpenAI, Groq, and Ollama)
38
  - Optimizes search queries using a language model
39
  - Fetches web pages and extracts main content (HTML and PDF)
40
  - Vectorizes the content for efficient retrieval
 
15
  This Python project provides a search agent that can perform web searches, optimize search queries, fetch and process web content, and generate responses using a language model and the retrieved information.
16
  Does a bit what [Perplexity AI](https://www.perplexity.ai/) does.
17
 
18
+ The Streamlit GUI hosted on 🤗 Sapces is [available to test](https://huggingface.co/spaces/CyranoB/search_agent)
19
 
20
+ This Python script and Streamli GUI are a basic search agent that utilizes the LangChain library to perform optimized web searches, retrieve relevant content, and generate informative answers to user queries. The script supports multiple language models and providers, including OpenAI, Anthropic, and Groq.
21
 
22
  The main functionality of the script can be summarized as follows:
23
 
 
35
 
36
  ## Features
37
 
38
+ - Supports multiple language model providers (Bedrock, OpenAI, Groq, Cohere, and Ollama)
39
  - Optimizes search queries using a language model
40
  - Fetches web pages and extracts main content (HTML and PDF)
41
  - Vectorizes the content for efficient retrieval
search_agent.py CHANGED
@@ -79,7 +79,7 @@ if __name__ == '__main__':
79
  query = arguments["SEARCH_QUERY"]
80
 
81
  chat = wr.get_chat_llm(provider, model, temperature)
82
- #console.log(f"Using {model} on {provider} with temperature {temperature}")
83
 
84
  with console.status(f"[bold green]Optimizing query for search: {query}"):
85
  optimize_search_query = wr.optimize_search_query(chat, query, callbacks=callbacks)
 
79
  query = arguments["SEARCH_QUERY"]
80
 
81
  chat = wr.get_chat_llm(provider, model, temperature)
82
+ console.log(f"Using {chat.model} on {provider} with temperature {temperature}")
83
 
84
  with console.status(f"[bold green]Optimizing query for search: {query}"):
85
  optimize_search_query = wr.optimize_search_query(chat, query, callbacks=callbacks)
search_agent_ui.py CHANGED
@@ -53,29 +53,20 @@ if prompt := st.chat_input("Enter you instructions...", disabled=st.session_stat
53
  st.chat_message("user").write(prompt)
54
  st.session_state.messages.append({"role": "user", "content": prompt})
55
 
56
- message = "I first need to do some research"
57
- st.chat_message("assistant").write(message)
58
- st.session_state.messages.append({"role": "assistant", "content": message})
59
-
60
- with st.spinner("Optimizing search query"):
61
  optimize_search_query = wr.optimize_search_query(chat, query=prompt, callbacks=[ls_tracer])
62
-
63
- message = f"I'll search the web for: {optimize_search_query}"
64
- st.chat_message("assistant").write(message)
65
- st.session_state.messages.append({"role": "assistant", "content": message})
66
-
67
- with st.spinner(f"Searching the web for: {optimize_search_query}"):
68
  sources = wc.get_sources(optimize_search_query, max_pages=20)
69
 
70
- with st.spinner(f"I'm now retrieveing the {len(sources)} webpages and documents I found (be patient)"):
71
  contents = wc.get_links_contents(sources)
72
 
73
- with st.spinner( f"Reading through the {len(contents)} sources I managed to retrieve"):
74
  vector_store = wc.vectorize(contents)
75
-
76
- message = f"Got {vector_store.index.ntotal} chunk of data"
77
- st.chat_message("assistant").write(message)
78
- st.session_state.messages.append({"role": "assistant", "content": message})
79
 
80
  rag_prompt = wr.build_rag_prompt(prompt, optimize_search_query, vector_store, top_k=5, callbacks=[ls_tracer])
81
  with st.chat_message("assistant"):
 
53
  st.chat_message("user").write(prompt)
54
  st.session_state.messages.append({"role": "user", "content": prompt})
55
 
56
+ with st.status("Thinking", expanded=True):
57
+ st.write("I first need to do some research")
58
+
 
 
59
  optimize_search_query = wr.optimize_search_query(chat, query=prompt, callbacks=[ls_tracer])
60
+ st.write(f"I should search the web for: {optimize_search_query}")
61
+
 
 
 
 
62
  sources = wc.get_sources(optimize_search_query, max_pages=20)
63
 
64
+ st.write(f"I'll now retrieve the {len(sources)} webpages and documents I found")
65
  contents = wc.get_links_contents(sources)
66
 
67
+ st.write( f"Reading through the {len(contents)} sources I managed to retrieve")
68
  vector_store = wc.vectorize(contents)
69
+ st.write(f"I collected {vector_store.index.ntotal} chunk of data and I can now answer")
 
 
 
70
 
71
  rag_prompt = wr.build_rag_prompt(prompt, optimize_search_query, vector_store, top_k=5, callbacks=[ls_tracer])
72
  with st.chat_message("assistant"):
web_crawler.py CHANGED
@@ -137,7 +137,6 @@ def vectorize(contents):
137
  print(f"[gray]Error processing content for {content['link']}: {e}")
138
  semantic_chunker = SemanticChunker(OpenAIEmbeddings(model="text-embedding-3-large"), breakpoint_threshold_type="percentile")
139
  docs = semantic_chunker.split_documents(documents)
140
- print(f"Vectorizing {len(docs)} document chunks")
141
  embeddings = OpenAIEmbeddings()
142
  store = FAISS.from_documents(docs, embeddings)
143
  return store
 
137
  print(f"[gray]Error processing content for {content['link']}: {e}")
138
  semantic_chunker = SemanticChunker(OpenAIEmbeddings(model="text-embedding-3-large"), breakpoint_threshold_type="percentile")
139
  docs = semantic_chunker.split_documents(documents)
 
140
  embeddings = OpenAIEmbeddings()
141
  store = FAISS.from_documents(docs, embeddings)
142
  return store