Spaces:

CyranoB
/

search_agent

Running

App Files Files Community

CyranoB commited on Apr 15, 2024

Commit

8c28786

1 Parent(s): df527c8

Added copywriter mode

Browse files

Files changed (5) hide show

copywriter.py +77 -0
requirements.txt +1 -0
search_agent.py +40 -4
search_agent_ui.py +1 -1
web_rag.py +6 -5

copywriter.py ADDED Viewed

	@@ -0,0 +1,77 @@

+from langchain.schema import SystemMessage, HumanMessage
+from langchain.prompts.chat import (
+    HumanMessagePromptTemplate,
+    SystemMessagePromptTemplate,
+    ChatPromptTemplate
+)
+from langchain.prompts.prompt import PromptTemplate
+def get_comments_prompt(query, draft):
+    system_message = SystemMessage(
+        content="""
+            You are an AI text reviewer with a keen eye for detail and a deep understanding of language, style, and grammar.
+            Your task is to refine and improve the draft content provided by the writers, offering advanced copyediting techniques and suggestions to enhance the overall quality of the text.
+            When a user submits a piece of writing, follow these steps:
+            1. Read the orginal query from the user so you understand clearly the request that was given to the writer.
+            2. Read through the draft text carefully, identifying areas that need improvement in terms of grammar, punctuation, spelling, syntax, and style.
+            3. Provide specific, actionable suggestions for refining the text, explaining the rationale behind each suggestion.
+            4. Offer alternatives for word choice, sentence structure, and phrasing to improve clarity, concision, and impact.
+            5. Ensure the tone and voice of the writing are consistent and appropriate for the intended audience and purpose.
+            6. Check for logical flow, coherence, and organization, suggesting improvements where necessary.
+            7. Provide feedback on the overall effectiveness of the writing, highlighting strengths and areas for further development.
+            Your suggestions should be constructive, insightful, and designed to help the user elevate the quality of their writing.
+            You never generate the corrected text by itself. *Only* give the comment.
+        """
+    )
+    human_message = HumanMessage(
+        content=f"""
+        Original query: {query}
+        ------------------------
+        Draft text: {draft}
+        """
+    )
+    return [system_message, human_message]
+def generate_comments(chat_llm, query, draft, callbacks=[]):
+    messages = get_comments_prompt(query, draft)
+    response = chat_llm.invoke(messages, config={"callbacks": callbacks})
+    return response.content
+def get_final_text_prompt(query, draft, comments):
+    system_message = SystemMessage(
+        content="""
+            You are an AI copyeditor with a keen eye for detail and a deep understanding of language, style, and grammar.
+            Your role is to elevate the quality of the writing.
+            You are given:
+                1. The orginal query from the user
+                2. The draft text from the writer
+                3. The comments from the reviewer
+            Your task is to refine and improve draft text taking into account the comments from the reviewer.
+            Output a fully edited version that takes into account the original query, the draft text, and the comments from the reviewer.
+            Keep the references of the draft untouched!
+        """
+    )
+    human_message = HumanMessage(
+        content=f"""
+        Original query: {query}
+        -------------------------------------
+        Draft text: {draft}
+        -------------------------------------
+        Comments from the reviewer: {comments}
+        -------------------------------------
+        Final text:
+        """
+    )
+    return [system_message, human_message]
+def generate_final_text(chat_llm, query, draft, comments, callbacks=[]):
+    messages = get_final_text_prompt(query, draft, comments)
+    response = chat_llm.invoke(messages, config={"callbacks": callbacks})
+    return response.content

requirements.txt CHANGED Viewed

@@ -15,6 +15,7 @@ langchain_experimental
 langchain_openai
 langchain_groq
 langsmith
 streamlit
 selenium
 rich

 langchain_openai
 langchain_groq
 langsmith
+schema
 streamlit
 selenium
 rich

search_agent.py CHANGED Viewed

@@ -6,6 +6,7 @@ Usage:
         [--provider=provider]
         [--model=model]
         [--temperature=temp]
         [--max_pages=num]
         [--output=text]
         SEARCH_QUERY
@@ -14,6 +15,7 @@ Usage:
 Options:
     -h --help                           Show this screen.
     --version                           Show version.
     -d domain --domain=domain           Limit search to a specific domain
     -t temp --temperature=temp          Set the temperature of the LLM [default: 0.0]
     -p provider --provider=provider     Use a specific LLM (choices: bedrock,openai,groq,ollama,cohere,fireworks) [default: openai]
@@ -26,6 +28,7 @@ Options:
 import os
 from docopt import docopt
 import dotenv
 from langchain.callbacks import LangChainTracer
@@ -37,6 +40,7 @@ from rich.markdown import Markdown
 import web_rag as wr
 import web_crawler as wc
 console = Console()
 dotenv.load_dotenv()
@@ -69,7 +73,18 @@ if os.getenv("LANGCHAIN_API_KEY"):
 if __name__ == '__main__':
     arguments = docopt(__doc__, version='Search Agent 0.1')
     provider = arguments["--provider"]
     model = arguments["--model"]
     temperature = float(arguments["--temperature"])
@@ -101,11 +116,32 @@ if __name__ == '__main__':
         vector_store = wc.vectorize(contents, embedding_model)
     with console.status("[bold green]Querying LLM relevant context", spinner='dots8Bit'):
-        respomse = wr.query_rag(chat, query, optimize_search_query, vector_store, top_k = 5, callbacks=callbacks)
     console.rule(f"[bold green]Response from {provider}")
     if output == "text":
-        console.print(respomse)
     else:
-        console.print(Markdown(respomse))
     console.rule("[bold green]")

         [--provider=provider]
         [--model=model]
         [--temperature=temp]
+        [--copywrite]
         [--max_pages=num]
         [--output=text]
         SEARCH_QUERY
 Options:
     -h --help                           Show this screen.
     --version                           Show version.
+    -c --copywrite                      First produce a draft, review it and rewrite for a final text
     -d domain --domain=domain           Limit search to a specific domain
     -t temp --temperature=temp          Set the temperature of the LLM [default: 0.0]
     -p provider --provider=provider     Use a specific LLM (choices: bedrock,openai,groq,ollama,cohere,fireworks) [default: openai]
 import os
 from docopt import docopt
+#from schema import Schema, Use, SchemaError
 import dotenv
 from langchain.callbacks import LangChainTracer
 import web_rag as wr
 import web_crawler as wc
+import copywriter as cw
 console = Console()
 dotenv.load_dotenv()
 if __name__ == '__main__':
     arguments = docopt(__doc__, version='Search Agent 0.1')
+    #schema = Schema({
+    #    '--max_pages': Use(int, error='--max_pages must be an integer'),
+    #    '--temperature': Use(float, error='--temperature must be an float'),
+    #})
+    #try:
+    #    arguments = schema.validate(arguments)
+    #except SchemaError as e:
+    #    exit(e)
+    copywrite_mode = arguments["--copywrite"]
     provider = arguments["--provider"]
     model = arguments["--model"]
     temperature = float(arguments["--temperature"])
         vector_store = wc.vectorize(contents, embedding_model)
     with console.status("[bold green]Querying LLM relevant context", spinner='dots8Bit'):
+        draft = wr.query_rag(chat, query, optimize_search_query, vector_store, top_k = 5, callbacks=callbacks)
     console.rule(f"[bold green]Response from {provider}")
     if output == "text":
+        console.print(draft)
     else:
+        console.print(Markdown(draft))
     console.rule("[bold green]")
+    if(copywrite_mode):
+        with console.status("[bold green]Getting comments from the reviewer", spinner="dots8Bit"):
+            comments = cw.generate_comments(chat, query, draft, callbacks=callbacks)
+        console.rule(f"[bold green]Response from reviewer")
+        if output == "text":
+            console.print(comments)
+        else:
+            console.print(Markdown(comments))
+        console.rule("[bold green]")
+        with console.status("[bold green]Writing the final text", spinner="dots8Bit"):
+            final_text = cw.generate_final_text(chat, query, draft, comments, callbacks=callbacks)
+        console.rule(f"[bold green]Final text")
+        if output == "text":
+            console.print(final_text)
+        else:
+            console.print(Markdown(final_text))
+        console.rule("[bold green]")

search_agent_ui.py CHANGED Viewed

@@ -52,7 +52,7 @@ with st.sidebar:
     model_provider = st.selectbox("🧠 Model provider 🧠", st.session_state["providers"])
     temperature = st.slider("🌡️ Model temperature 🌡️", 0.0, 1.0, 0.1, help="The higher the more creative")
     max_pages = st.slider("🔍 Max pages to retrieve 🔍", 1, 20, 15, help="How many web pages to retrive from the internet")
-    top_k_documents = st.slider("📄 How many document extracts to consider 📄", 1, 20, 5, help="How many of the top extracts to consider")
 if "messages" not in st.session_state:
     st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]

     model_provider = st.selectbox("🧠 Model provider 🧠", st.session_state["providers"])
     temperature = st.slider("🌡️ Model temperature 🌡️", 0.0, 1.0, 0.1, help="The higher the more creative")
     max_pages = st.slider("🔍 Max pages to retrieve 🔍", 1, 20, 15, help="How many web pages to retrive from the internet")
+    top_k_documents = st.slider("📄 How many doc extracts to consider 📄", 1, 20, 5, help="How many of the top extracts to consider")
 if "messages" not in st.session_state:
     st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]

web_rag.py CHANGED Viewed

@@ -47,12 +47,13 @@ def get_models(provider, model=None, temperature=0.0):
             chat_llm = BedrockChat(
                 credentials_profile_name=credentials_profile_name,
                 model_id=model,
-                model_kwargs={"temperature": temperature },
-            )
-            embedding_model = BedrockEmbeddings(
-                model_id='cohere.embed-multilingual-v3',
-                credentials_profile_name=credentials_profile_name
             )
         case 'openai':
             if model is None:
                 model = "gpt-3.5-turbo"

             chat_llm = BedrockChat(
                 credentials_profile_name=credentials_profile_name,
                 model_id=model,
+                model_kwargs={"temperature": temperature, 'max_tokens': 8192 },
             )
+            #embedding_model = BedrockEmbeddings(
+            #    model_id='cohere.embed-multilingual-v3',
+            #    credentials_profile_name=credentials_profile_name
+            #)
+            embedding_model = OpenAIEmbeddings(model='text-embedding-3-small')
         case 'openai':
             if model is None:
                 model = "gpt-3.5-turbo"