ofermend commited on
Commit
ce90730
·
1 Parent(s): 4c78198
Files changed (5) hide show
  1. README.md +3 -3
  2. agent.py +43 -112
  3. app.py +2 -2
  4. requirements.txt +1 -1
  5. st_app.py +66 -8
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
- title: Clinical Trials Assistant
3
- emoji: 👨‍⚕️
4
  colorFrom: indigo
5
  colorTo: indigo
6
  sdk: docker
7
  app_port: 8501
8
  pinned: false
9
  license: apache-2.0
10
- short_description: Clinical Trial assistant using vectara-agentic
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Cona Assistant
3
+ emoji: 🐨
4
  colorFrom: indigo
5
  colorTo: indigo
6
  sdk: docker
7
  app_port: 8501
8
  pinned: false
9
  license: apache-2.0
10
+ short_description: Ask questions about Cona Services
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
agent.py CHANGED
@@ -1,137 +1,68 @@
1
- import pandas as pd
2
- import requests
 
3
  from pydantic import Field, BaseModel
4
-
5
  from omegaconf import OmegaConf
 
6
 
7
  from vectara_agentic.agent import Agent
8
- from vectara_agentic.tools import ToolsFactory, VectaraToolFactory
9
 
10
- initial_prompt = "How can I help you today?"
 
11
 
12
- prompt = """
13
- [
14
- {"role": "system", "content": "You are an AI assistant that forms a coherent answer to a user query based on search results that are provided to you." },
15
- {"role": "user", "content": "
16
- [INSTRUCTIONS]
17
- If the search results are irrelevant to the question respond with *** I do not have enough information to answer this question.***
18
- Search results may include tables in a markdown format. When answering a question using a table be careful about which rows and columns contain the answer and include all relevant information from the relevant rows and columns that the query is asking about.
19
- Do not base your response on information or knowledge that is not in the search results.
20
- Make sure your response is answering the query asked. If the query is related to an entity (such as a person or place), make sure you use search results related to that entity.
21
- Consider that each search result is a partial segment from a bigger text, and may be incomplete.
22
- Your output should always be in a single language - the $vectaraLangName language. Check spelling and grammar for the $vectaraLangName language.
23
- Search results for the query *** $vectaraQuery***, are listed below, some are text, some MAY be tables in markdown format.
24
- #foreach ($qResult in $vectaraQueryResultsDeduped)
25
- [$esc.java($foreach.index + 1)]
26
- #if($qResult.hasTable())
27
- Table Title: $qResult.getTable().title() || Table Description: $qResult.getTable().description() || Table Data:
28
- $qResult.getTable().markdown()
29
- #else
30
- $qResult.getText()
31
- #end
32
- #end
33
- Generate a coherent response (but no more than $vectaraOutChars characters) to the query *** $vectaraQuery *** using information and facts in the search results provided.
34
- Give a slight preference to search results that appear earlier in the list.
35
- Include statistical and numerical evidence to support and contextualize your response.
36
- Only cite relevant search results in your answer following these specific instructions: $vectaraCitationInstructions
37
- If the search results are irrelevant to the query, respond with ***I do not have enough information to answer this question.***. Respond always in the $vectaraLangName language, and only in that language."}
38
- ]
39
- """
40
 
41
  def create_assistant_tools(cfg):
42
 
 
 
43
 
44
- class QueryPublicationsArgs(BaseModel):
45
- query: str = Field(..., description="The user query, always in the form of a question",
46
- examples=["what are the risks reported?", "which drug was use on the and how big was the population?"])
47
-
48
- vec_factory = VectaraToolFactory(vectara_api_key=cfg.api_key,
49
- vectara_corpus_key=cfg.corpus_key)
50
  summarizer = 'vectara-summary-table-md-query-ext-jan-2025-gpt-4o'
51
- ask_publications = vec_factory.create_rag_tool(
52
- tool_name = "ask_publications",
53
  tool_description = """
54
- Responds to an user question about a particular result, based on the publications.
 
55
  """,
56
- tool_args_schema = QueryPublicationsArgs,
57
- # reranker = "multilingual_reranker_v1", rerank_k = 100,
58
- reranker = "chain", rerank_k = 100,
59
- rerank_chain = [
60
- {
61
- "type": "multilingual_reranker_v1",
62
- # "cutoff": 0.2
63
- },
64
- {
65
- "type": "mmr",
66
- "diversity_bias": 0.2,
67
- "limit": 50
68
- }
69
- ],
70
- n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.005,
71
- summary_num_results = 15,
72
  vectara_summarizer = summarizer,
 
73
  include_citations = True,
74
- vectara_prompt_text=prompt,
75
- save_history = True,
76
- verbose=False
77
- )
78
-
79
- search_publications = vec_factory.create_search_tool(
80
- tool_name = "search_publications",
81
- tool_description = """
82
- Returns matching publications to a user query.
83
- """,
84
- tool_args_schema = QueryPublicationsArgs,
85
- reranker = "chain", rerank_k = 100,
86
- rerank_chain = [
87
- {
88
- "type": "multilingual_reranker_v1",
89
- # "cutoff": 0.2
90
- },
91
- {
92
- "type": "mmr",
93
- "diversity_bias": 0.2,
94
- "limit": 50
95
- }
96
- ],
97
- # reranker = "multilingual_reranker_v1", rerank_k = 100,
98
- n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.005,
99
- save_history = True,
100
- verbose=True
101
- )
102
-
103
-
104
- tools_factory = ToolsFactory()
105
- return (
106
- tools_factory.standard_tools() +
107
- [ask_publications, search_publications]
108
  )
 
109
 
110
  def initialize_agent(_cfg, agent_progress_callback=None):
111
- menarini_bot_instructions = """
112
- - You are an expert statistician and clinical trial data analyst with extensive experience in designing, analyzing, and interpreting clinical research data.
113
- - Your responses should be technically rigorous, data-driven, and written for an audience familiar with advanced statistical methodologies, regulatory standards, and the nuances of clinical trial design.
114
- - Call the ask_publications tool to retreive information to answer the user query.
115
- If the initial query lacks comprehensive data, continue to query ask_publications with refined search parameters until you retrieve all necessary numerical details
116
- - Call the search_publications tool to retreive a list of publications that may contain the information needed to answer the user query.
117
- The results include the document_id of each publication, and metadata.
118
- - When responding to queries:
119
- 1) Use precise statistical terminology (e.g., randomization, blinding, intention-to-treat, type I/II error, p-values, confidence intervals, Bayesian methods, etc.)
120
- and reference common methodologies or guidelines where applicable (e.g., CONSORT, FDA, EMA).
121
- 2) Your responses must include contextual information such as sample size and population characteristics. This nuance is crucial in clinical trial analysis.
122
- When considering or reporting sample sizes, consider participants who were eligible for the study, those who were randomized, and those who completed the study.
123
- If it's unclear which one is being referred to, clarify this in your response or ask the user for clarification.
124
- 3) Provide clear explanations of statistical concepts, including assumptions, potential biases, and limitations in the context of clinical trial data.
125
- 4) Ensure that your analysis is evidence-based and reflects current best practices in the field of clinical research and data analysis.
126
- 5) Before finalizing your answer, review the analysis to ensure that all relevant data has been incorporated and that your conclusions are well-supported by the evidence.
127
- 6) Provide sources and citations for all data and statistical information included in your responses, as provided in the response from the tools.
128
  """
129
 
130
  agent = Agent(
131
  tools=create_assistant_tools(_cfg),
132
- topic="Drug trials publications",
133
- custom_instructions=menarini_bot_instructions,
134
  agent_progress_callback=agent_progress_callback,
135
  )
136
  agent.report()
137
- return agent
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Optional
3
+ import json
4
  from pydantic import Field, BaseModel
 
5
  from omegaconf import OmegaConf
6
+ import requests
7
 
8
  from vectara_agentic.agent import Agent
9
+ from vectara_agentic.tools import VectaraToolFactory, ToolsFactory
10
 
11
+ from dotenv import load_dotenv
12
+ load_dotenv(override=True)
13
 
14
+ initial_prompt = "How can I help you today?"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def create_assistant_tools(cfg):
17
 
18
+ class QueryCona(BaseModel):
19
+ query: str = Field(description="The user query.")
20
 
21
+ vec_factory = VectaraToolFactory(
22
+ vectara_api_key=cfg.api_key,
23
+ vectara_corpus_key=cfg.corpus_key
24
+ )
25
+
 
26
  summarizer = 'vectara-summary-table-md-query-ext-jan-2025-gpt-4o'
27
+ ask_ti = vec_factory.create_rag_tool(
28
+ tool_name = "ask_cona",
29
  tool_description = """
30
+ Given a user query,
31
+ returns a response to a user question about bottling companies.
32
  """,
33
+ tool_args_schema = QueryCona,
34
+ reranker = "slingshot", rerank_k = 100,
35
+ n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.01,
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  vectara_summarizer = summarizer,
37
+ summary_num_results = 20,
38
  include_citations = True,
39
+ verbose = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  )
41
+ return [ask_ti] + ToolsFactory().guardrail_tools()
42
 
43
  def initialize_agent(_cfg, agent_progress_callback=None):
44
+ bot_instructions = """
45
+ - You are a helpful assistant, with expertise in products from coca cola and other bottling companies.
46
+ - Use the ask_coma tool to answer most questions about any products related to coca cola.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  """
48
 
49
  agent = Agent(
50
  tools=create_assistant_tools(_cfg),
51
+ topic="Cona services and coca cola",
52
+ custom_instructions=bot_instructions,
53
  agent_progress_callback=agent_progress_callback,
54
  )
55
  agent.report()
56
+ return agent
57
+
58
+
59
+ def get_agent_config() -> OmegaConf:
60
+ cfg = OmegaConf.create({
61
+ 'corpus_key': str(os.environ['VECTARA_CORPUS_KEY']),
62
+ 'api_key': str(os.environ['VECTARA_API_KEY']),
63
+ 'examples': os.environ.get('QUERY_EXAMPLES', None),
64
+ 'demo_name': "Cona Demo",
65
+ 'demo_welcome': "Cona Assistant.",
66
+ 'demo_description': "This assistant can help you with any questions about Cona Serices."
67
+ })
68
+ return cfg
app.py CHANGED
@@ -13,6 +13,6 @@ if "feedback_key" not in st.session_state:
13
  st.session_state.feedback_key = 0
14
 
15
  if __name__ == "__main__":
16
- st.set_page_config(page_title="Menarini Assistant", layout="wide")
17
  nest_asyncio.apply()
18
- asyncio.run(launch_bot())
 
13
  st.session_state.feedback_key = 0
14
 
15
  if __name__ == "__main__":
16
+ st.set_page_config(page_title="Cona Assistant", layout="wide")
17
  nest_asyncio.apply()
18
+ asyncio.run(launch_bot())
requirements.txt CHANGED
@@ -6,4 +6,4 @@ streamlit_feedback==0.1.3
6
  uuid==1.30
7
  langdetect==1.0.9
8
  langcodes==3.4.0
9
- vectara-agentic==0.2.1
 
6
  uuid==1.30
7
  langdetect==1.0.9
8
  langcodes==3.4.0
9
+ vectara-agentic==0.2.1
st_app.py CHANGED
@@ -3,13 +3,24 @@ import sys
3
  import re
4
 
5
  import streamlit as st
 
 
 
 
6
 
7
  from vectara_agentic.agent import AgentStatusType
8
- from agent import initialize_agent
9
- from config import get_agent_config
10
 
11
  initial_prompt = "How can I help you today?"
12
 
 
 
 
 
 
 
 
 
13
 
14
  def format_log_msg(log_msg: str):
15
  max_log_msg_size = 500
@@ -57,19 +68,50 @@ async def launch_bot():
57
  cfg = get_agent_config()
58
  st.session_state.cfg = cfg
59
  st.session_state.ex_prompt = None
 
 
60
  reset()
61
 
62
  cfg = st.session_state.cfg
63
- print(f'Configuration: {cfg}')
64
 
65
  # left side content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  # Display chat messages
68
  for message in st.session_state.messages:
69
- print(f'Message: {message}')
70
  with st.chat_message(message["role"], avatar=message["avatar"]):
71
  st.write(message["content"])
72
 
 
 
 
 
 
 
73
 
74
  # User-provided prompt
75
  if st.session_state.ex_prompt:
@@ -90,17 +132,33 @@ async def launch_bot():
90
  if st.session_state.prompt:
91
  with st.chat_message("assistant", avatar='🤖'):
92
  st.session_state.status = st.status('Processing...', expanded=False)
93
- res = st.session_state.agent.chat(st.session_state.prompt)
94
- #res = escape_dollars_outside_latex(res)
95
- res = str(res)
96
  message = {"role": "assistant", "content": res, "avatar": '🤖'}
97
  st.session_state.messages.append(message)
98
  st.markdown(res)
99
 
 
 
 
 
 
100
 
101
  st.session_state.ex_prompt = None
102
  st.session_state.prompt = None
103
  st.session_state.first_turn = False
104
  st.rerun()
105
 
106
- sys.stdout.flush()
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import re
4
 
5
  import streamlit as st
6
+ from streamlit_pills import pills
7
+ from streamlit_feedback import streamlit_feedback
8
+
9
+ from utils import thumbs_feedback, escape_dollars_outside_latex, send_amplitude_data
10
 
11
  from vectara_agentic.agent import AgentStatusType
12
+ from agent import initialize_agent, get_agent_config
 
13
 
14
  initial_prompt = "How can I help you today?"
15
 
16
+ def show_example_questions():
17
+ if len(st.session_state.example_messages) > 0 and st.session_state.first_turn:
18
+ selected_example = pills("Queries to Try:", st.session_state.example_messages, index=None)
19
+ if selected_example:
20
+ st.session_state.ex_prompt = selected_example
21
+ st.session_state.first_turn = False
22
+ return True
23
+ return False
24
 
25
  def format_log_msg(log_msg: str):
26
  max_log_msg_size = 500
 
68
  cfg = get_agent_config()
69
  st.session_state.cfg = cfg
70
  st.session_state.ex_prompt = None
71
+ example_messages = [example.strip() for example in cfg.examples.split(";")] if cfg.examples else []
72
+ st.session_state.example_messages = [em for em in example_messages if len(em)>0]
73
  reset()
74
 
75
  cfg = st.session_state.cfg
 
76
 
77
  # left side content
78
+ with st.sidebar:
79
+ image = Image.open('Vectara-logo.png')
80
+ st.image(image, width=175)
81
+ st.markdown(f"## {cfg['demo_welcome']}")
82
+ st.markdown(f"{cfg['demo_description']}")
83
+
84
+ st.markdown("\n\n")
85
+ bc1, bc2 = st.columns([1, 1])
86
+ with bc1:
87
+ if st.button('Start Over'):
88
+ reset()
89
+ st.rerun()
90
+ with bc2:
91
+ if st.button('Show Logs'):
92
+ show_modal()
93
+
94
+ # st.divider()
95
+ # st.markdown(
96
+ # "## How this works?\n"
97
+ # "This app was built with [Vectara](https://vectara.com).\n\n"
98
+ # "It demonstrates the use of Agentic RAG functionality with Vectara"
99
+ # )
100
+
101
+ if "messages" not in st.session_state.keys():
102
+ reset()
103
 
104
  # Display chat messages
105
  for message in st.session_state.messages:
 
106
  with st.chat_message(message["role"], avatar=message["avatar"]):
107
  st.write(message["content"])
108
 
109
+ example_container = st.empty()
110
+ with example_container:
111
+ if show_example_questions():
112
+ example_container.empty()
113
+ st.session_state.first_turn = False
114
+ st.rerun()
115
 
116
  # User-provided prompt
117
  if st.session_state.ex_prompt:
 
132
  if st.session_state.prompt:
133
  with st.chat_message("assistant", avatar='🤖'):
134
  st.session_state.status = st.status('Processing...', expanded=False)
135
+ response = st.session_state.agent.chat(st.session_state.prompt)
136
+ res = escape_dollars_outside_latex(response.response)
 
137
  message = {"role": "assistant", "content": res, "avatar": '🤖'}
138
  st.session_state.messages.append(message)
139
  st.markdown(res)
140
 
141
+ send_amplitude_data(
142
+ user_query=st.session_state.messages[-2]["content"],
143
+ bot_response=st.session_state.messages[-1]["content"],
144
+ demo_name=cfg['demo_name']
145
+ )
146
 
147
  st.session_state.ex_prompt = None
148
  st.session_state.prompt = None
149
  st.session_state.first_turn = False
150
  st.rerun()
151
 
152
+ # Record user feedback
153
+ if (st.session_state.messages[-1]["role"] == "assistant") & (st.session_state.messages[-1]["content"] != initial_prompt):
154
+ if "feedback_key" not in st.session_state:
155
+ st.session_state.feedback_key = 0
156
+ streamlit_feedback(
157
+ feedback_type="thumbs", on_submit=thumbs_feedback, key=str(st.session_state.feedback_key),
158
+ kwargs={"user_query": st.session_state.messages[-2]["content"],
159
+ "bot_response": st.session_state.messages[-1]["content"],
160
+ "demo_name": cfg["demo_name"]}
161
+ )
162
+
163
+
164
+ sys.stdout.flush()