Spaces:
Sleeping
Sleeping
updates to work with ANTHROPIC
Browse files
agent.py
CHANGED
@@ -30,11 +30,14 @@ prompt = """
|
|
30 |
$qResult.getText()
|
31 |
#end
|
32 |
#end
|
33 |
-
Generate a coherent response
|
34 |
Give a slight preference to search results that appear earlier in the list.
|
35 |
Include statistical and numerical evidence to support and contextualize your response.
|
|
|
|
|
36 |
Only cite relevant search results in your answer following these specific instructions: $vectaraCitationInstructions
|
37 |
-
If the search results are irrelevant to the query, respond with ***I do not have enough information to answer this question.***.
|
|
|
38 |
]
|
39 |
"""
|
40 |
|
@@ -54,71 +57,48 @@ def create_assistant_tools(cfg):
|
|
54 |
Responds to an user question about a particular result, based on the publications.
|
55 |
""",
|
56 |
tool_args_schema = QueryPublicationsArgs,
|
57 |
-
# reranker = "multilingual_reranker_v1", rerank_k = 100,
|
58 |
reranker = "chain", rerank_k = 100,
|
59 |
rerank_chain = [
|
60 |
{
|
61 |
"type": "multilingual_reranker_v1",
|
62 |
-
|
63 |
},
|
64 |
{
|
65 |
"type": "mmr",
|
66 |
-
"diversity_bias": 0.
|
67 |
-
"limit":
|
68 |
}
|
69 |
],
|
70 |
-
n_sentences_before =
|
71 |
-
summary_num_results =
|
72 |
vectara_summarizer = summarizer,
|
73 |
include_citations = True,
|
74 |
-
vectara_prompt_text=prompt,
|
75 |
save_history = True,
|
76 |
-
verbose=
|
77 |
)
|
78 |
|
79 |
-
search_publications = vec_factory.create_search_tool(
|
80 |
-
tool_name = "search_publications",
|
81 |
-
tool_description = """
|
82 |
-
Returns matching publications to a user query.
|
83 |
-
""",
|
84 |
-
tool_args_schema = QueryPublicationsArgs,
|
85 |
-
reranker = "chain", rerank_k = 100,
|
86 |
-
rerank_chain = [
|
87 |
-
{
|
88 |
-
"type": "multilingual_reranker_v1",
|
89 |
-
# "cutoff": 0.2
|
90 |
-
},
|
91 |
-
{
|
92 |
-
"type": "mmr",
|
93 |
-
"diversity_bias": 0.2,
|
94 |
-
"limit": 50
|
95 |
-
}
|
96 |
-
],
|
97 |
-
# reranker = "multilingual_reranker_v1", rerank_k = 100,
|
98 |
-
n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.005,
|
99 |
-
save_history = True,
|
100 |
-
verbose=True
|
101 |
-
)
|
102 |
-
|
103 |
-
|
104 |
tools_factory = ToolsFactory()
|
105 |
return (
|
106 |
tools_factory.standard_tools() +
|
107 |
-
[ask_publications
|
108 |
)
|
109 |
|
110 |
def initialize_agent(_cfg, agent_progress_callback=None):
|
111 |
menarini_bot_instructions = """
|
112 |
-
- You are an expert
|
113 |
-
- Your responses should be technically rigorous, data-driven, and written for an audience familiar with advanced statistical methodologies, regulatory standards,
|
|
|
|
|
114 |
- Call the ask_publications tool to retreive information to answer the user query.
|
115 |
-
If the initial query lacks comprehensive data,
|
116 |
-
|
117 |
-
|
|
|
118 |
- When responding to queries:
|
119 |
1) Use precise statistical terminology (e.g., randomization, blinding, intention-to-treat, type I/II error, p-values, confidence intervals, Bayesian methods, etc.)
|
120 |
and reference common methodologies or guidelines where applicable (e.g., CONSORT, FDA, EMA).
|
121 |
-
2) Your responses must include contextual information
|
122 |
When considering or reporting sample sizes, consider participants who were eligible for the study, those who were randomized, and those who completed the study.
|
123 |
If it's unclear which one is being referred to, clarify this in your response or ask the user for clarification.
|
124 |
3) Provide clear explanations of statistical concepts, including assumptions, potential biases, and limitations in the context of clinical trial data.
|
|
|
30 |
$qResult.getText()
|
31 |
#end
|
32 |
#end
|
33 |
+
Generate a coherent response to the query *** $vectaraQuery *** using information and facts in the search results provided.
|
34 |
Give a slight preference to search results that appear earlier in the list.
|
35 |
Include statistical and numerical evidence to support and contextualize your response.
|
36 |
+
Your response should be comprehensive and include all relevant information from the search results. Do not omit any relevant information.
|
37 |
+
If multiple trials are mentioned in the search results, include all relevant trials in your response, and highlight the various trials included.
|
38 |
Only cite relevant search results in your answer following these specific instructions: $vectaraCitationInstructions
|
39 |
+
If the search results are irrelevant to the query, respond with ***I do not have enough information to answer this question.***.
|
40 |
+
Respond always in the $vectaraLangName language, and only in that language."}
|
41 |
]
|
42 |
"""
|
43 |
|
|
|
57 |
Responds to an user question about a particular result, based on the publications.
|
58 |
""",
|
59 |
tool_args_schema = QueryPublicationsArgs,
|
|
|
60 |
reranker = "chain", rerank_k = 100,
|
61 |
rerank_chain = [
|
62 |
{
|
63 |
"type": "multilingual_reranker_v1",
|
64 |
+
"cutoff": 0.1
|
65 |
},
|
66 |
{
|
67 |
"type": "mmr",
|
68 |
+
"diversity_bias": 0.1,
|
69 |
+
"limit": 100
|
70 |
}
|
71 |
],
|
72 |
+
n_sentences_before = 3, n_sentences_after = 3, lambda_val = 0.005,
|
73 |
+
summary_num_results = 25,
|
74 |
vectara_summarizer = summarizer,
|
75 |
include_citations = True,
|
76 |
+
vectara_prompt_text = prompt,
|
77 |
save_history = True,
|
78 |
+
verbose = True
|
79 |
)
|
80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
tools_factory = ToolsFactory()
|
82 |
return (
|
83 |
tools_factory.standard_tools() +
|
84 |
+
[ask_publications]
|
85 |
)
|
86 |
|
87 |
def initialize_agent(_cfg, agent_progress_callback=None):
|
88 |
menarini_bot_instructions = """
|
89 |
+
- You are an expert in clinical trial and statistical data analysis with extensive experience in designing, analyzing, and interpreting clinical research data.
|
90 |
+
- Your responses should be technically rigorous, data-driven, and written for an audience familiar with advanced statistical methodologies, regulatory standards,
|
91 |
+
and the nuances of clinical trial design.
|
92 |
+
- If asked about clinical trial, cover all relevant trials in your response (avoid aggregating results across trials, unless specifically asked to do so).
|
93 |
- Call the ask_publications tool to retreive information to answer the user query.
|
94 |
+
If the initial query lacks comprehensive data, try to call ask_publications with a different or refined query until you retrieve all necessary numerical details.
|
95 |
+
You can try multiple times. You can include additional information about the context of the query (like clinical trial name) to get more relevant results.
|
96 |
+
- If you are missing information about a specific trial, or some data item, call the ask_publication tool with a specific query to retrieve the missing information.
|
97 |
+
- If ask_publications returns citations or references, include them in your response.
|
98 |
- When responding to queries:
|
99 |
1) Use precise statistical terminology (e.g., randomization, blinding, intention-to-treat, type I/II error, p-values, confidence intervals, Bayesian methods, etc.)
|
100 |
and reference common methodologies or guidelines where applicable (e.g., CONSORT, FDA, EMA).
|
101 |
+
2) Your responses must include contextual information like sample size (n) and population characteristics. This nuance is crucial in clinical trial analysis.
|
102 |
When considering or reporting sample sizes, consider participants who were eligible for the study, those who were randomized, and those who completed the study.
|
103 |
If it's unclear which one is being referred to, clarify this in your response or ask the user for clarification.
|
104 |
3) Provide clear explanations of statistical concepts, including assumptions, potential biases, and limitations in the context of clinical trial data.
|
app.py
CHANGED
@@ -1,10 +1,13 @@
|
|
1 |
import streamlit as st
|
|
|
2 |
from st_app import launch_bot
|
3 |
import uuid
|
4 |
|
5 |
import nest_asyncio
|
6 |
import asyncio
|
7 |
|
|
|
|
|
8 |
# Setup for HTTP API Calls to Amplitude Analytics
|
9 |
if 'device_id' not in st.session_state:
|
10 |
st.session_state.device_id = str(uuid.uuid4())
|
@@ -12,7 +15,11 @@ if 'device_id' not in st.session_state:
|
|
12 |
if "feedback_key" not in st.session_state:
|
13 |
st.session_state.feedback_key = 0
|
14 |
|
|
|
|
|
|
|
15 |
if __name__ == "__main__":
|
16 |
st.set_page_config(page_title="Menarini Assistant", layout="wide")
|
|
|
17 |
nest_asyncio.apply()
|
18 |
-
asyncio.run(
|
|
|
1 |
import streamlit as st
|
2 |
+
import torch
|
3 |
from st_app import launch_bot
|
4 |
import uuid
|
5 |
|
6 |
import nest_asyncio
|
7 |
import asyncio
|
8 |
|
9 |
+
torch.classes.__path__ = []
|
10 |
+
|
11 |
# Setup for HTTP API Calls to Amplitude Analytics
|
12 |
if 'device_id' not in st.session_state:
|
13 |
st.session_state.device_id = str(uuid.uuid4())
|
|
|
15 |
if "feedback_key" not in st.session_state:
|
16 |
st.session_state.feedback_key = 0
|
17 |
|
18 |
+
async def main():
|
19 |
+
await launch_bot()
|
20 |
+
|
21 |
if __name__ == "__main__":
|
22 |
st.set_page_config(page_title="Menarini Assistant", layout="wide")
|
23 |
+
|
24 |
nest_asyncio.apply()
|
25 |
+
asyncio.run(main())
|
requirements.txt
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
omegaconf==2.3.0
|
2 |
python-dotenv==1.0.1
|
3 |
-
streamlit==1.
|
4 |
-
streamlit_pills==0.3.0
|
5 |
streamlit_feedback==0.1.3
|
6 |
uuid==1.30
|
7 |
langdetect==1.0.9
|
8 |
langcodes==3.4.0
|
9 |
-
vectara-agentic==0.2.
|
|
|
1 |
omegaconf==2.3.0
|
2 |
python-dotenv==1.0.1
|
3 |
+
streamlit==1.43.2
|
|
|
4 |
streamlit_feedback==0.1.3
|
5 |
uuid==1.30
|
6 |
langdetect==1.0.9
|
7 |
langcodes==3.4.0
|
8 |
+
vectara-agentic==0.2.8
|
st_app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
from PIL import Image
|
2 |
import sys
|
3 |
import re
|
4 |
|
|
|
|
|
1 |
import sys
|
2 |
import re
|
3 |
|