angry-meow commited on
Commit
7ea1b4c
·
2 Parent(s): 1cbf254 908a486

start of some start of agents work

Browse files
Files changed (2) hide show
  1. app.py +68 -31
  2. helper_functions.py +42 -12
app.py CHANGED
@@ -12,7 +12,7 @@ from langchain_community.document_loaders import PyMuPDFLoader
12
  import uuid
13
  import chainlit as cl
14
  import os
15
- from helper_functions import process_file, add_to_qdrant
16
 
17
  chat_model = ChatOpenAI(model="gpt-4o-mini")
18
  te3_small = OpenAIEmbeddings(model="text-embedding-3-small")
@@ -38,24 +38,86 @@ chat_prompt = ChatPromptTemplate.from_messages([("system", rag_system_prompt_tem
38
  @cl.on_chat_start
39
  async def on_chat_start():
40
  qdrant_client = QdrantClient(url=os.environ["QDRANT_ENDPOINT"], api_key=os.environ["QDRANT_API_KEY"])
 
41
  qdrant_store = Qdrant(
42
  client=qdrant_client,
43
  collection_name="kai_test_docs",
44
  embeddings=te3_small
45
  )
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  res = await cl.AskActionMessage(
48
  content="Pick an action!",
49
  actions=[
50
  cl.Action(name="Question", value="question", label="Ask a question"),
51
- cl.Action(name="File", value="file", label="Upload a file or URL"),
 
52
  ],
53
  ).send()
 
54
 
 
55
  if res and res.get("value") == "file":
56
  files = None
57
  files = await cl.AskFileMessage(
58
- content="Please upload a URL, Text, PDF file to begin!",
59
  accept=["text/plain", "application/pdf"],
60
  max_size_mb=12,
61
  ).send()
@@ -82,33 +144,8 @@ async def on_chat_start():
82
  msg.content = f"Processing `{file.name}` done. You can now ask questions!"
83
  await msg.update()
84
 
 
 
 
85
  if res and res.get("value") == "question":
86
  await cl.Message(content="Ask away!").send()
87
-
88
- # Load the style guide from the local file system
89
- style_guide_path = "./public/CoExperiences Writing Style Guide V1 (2024).pdf"
90
- loader = PyMuPDFLoader(style_guide_path)
91
- style_guide_docs = loader.load()
92
- style_guide_text = "\n".join([doc.page_content for doc in style_guide_docs])
93
-
94
- retriever = qdrant_store.as_retriever()
95
- global retrieval_augmented_qa_chain
96
- retrieval_augmented_qa_chain = (
97
- {
98
- "context": itemgetter("question") | retriever,
99
- "question": itemgetter("question"),
100
- "writing_style_guide": lambda _: style_guide_text
101
- }
102
- | RunnablePassthrough.assign(context=itemgetter("context"))
103
- | chat_prompt
104
- | chat_model
105
- )
106
-
107
- @cl.author_rename
108
- def rename(orig_author: str):
109
- return "AI Assistant"
110
-
111
- @cl.on_message
112
- async def main(message: cl.Message):
113
- response = retrieval_augmented_qa_chain.invoke({"question": message.content})
114
- await cl.Message(content=response.content).send()
 
12
  import uuid
13
  import chainlit as cl
14
  import os
15
+ from helper_functions import process_file, load_documents_from_url, add_to_qdrant
16
 
17
  chat_model = ChatOpenAI(model="gpt-4o-mini")
18
  te3_small = OpenAIEmbeddings(model="text-embedding-3-small")
 
38
  @cl.on_chat_start
39
  async def on_chat_start():
40
  qdrant_client = QdrantClient(url=os.environ["QDRANT_ENDPOINT"], api_key=os.environ["QDRANT_API_KEY"])
41
+ global qdrant_store
42
  qdrant_store = Qdrant(
43
  client=qdrant_client,
44
  collection_name="kai_test_docs",
45
  embeddings=te3_small
46
  )
47
 
48
+ res = await ask_action()
49
+ await handle_response(res)
50
+
51
+ # Load the style guide from the local file system
52
+ style_guide_path = "./public/CoExperiences Writing Style Guide V1 (2024).pdf"
53
+ loader = PyMuPDFLoader(style_guide_path)
54
+ style_guide_docs = loader.load()
55
+ style_guide_text = "\n".join([doc.page_content for doc in style_guide_docs])
56
+
57
+ retriever = qdrant_store.as_retriever()
58
+ global retrieval_augmented_qa_chain
59
+ retrieval_augmented_qa_chain = (
60
+ {
61
+ "context": itemgetter("question") | retriever,
62
+ "question": itemgetter("question"),
63
+ "writing_style_guide": lambda _: style_guide_text
64
+ }
65
+ | RunnablePassthrough.assign(context=itemgetter("context"))
66
+ | chat_prompt
67
+ | chat_model
68
+ )
69
+
70
+ @cl.author_rename
71
+ def rename(orig_author: str):
72
+ return "AI Assistant"
73
+
74
+ @cl.on_message
75
+ async def main(message: cl.Message):
76
+ if message.content.startswith("http://") or message.content.startswith("https://"):
77
+ message_type = "url"
78
+ else:
79
+ message_type = "question"
80
+
81
+ if message_type == "url":
82
+ # load the file
83
+ docs = load_documents_from_url(message.content)
84
+ splits = text_splitter.split_documents(docs)
85
+ for i, doc in enumerate(splits):
86
+ doc.metadata["user_upload_source"] = f"source_{i}"
87
+ print(f"Processing {len(docs)} text chunks")
88
+
89
+ # Add to the qdrant_store
90
+ qdrant_store.add_documents(
91
+ documents=splits
92
+ )
93
+
94
+ await cl.Message(f"Processing `{response.url}` done. You can now ask questions!").send()
95
+
96
+ else:
97
+ response = retrieval_augmented_qa_chain.invoke({"question": message.content})
98
+ await cl.Message(content=response.content).send()
99
+
100
+ res = await ask_action()
101
+ await handle_response(res)
102
+
103
+
104
+ ## Chainlit helper functions
105
+ async def ask_action():
106
  res = await cl.AskActionMessage(
107
  content="Pick an action!",
108
  actions=[
109
  cl.Action(name="Question", value="question", label="Ask a question"),
110
+ cl.Action(name="File", value="file", label="Upload a file"),
111
+ cl.Action(name="Url", value="url", label="Upload a URL"),
112
  ],
113
  ).send()
114
+ return res
115
 
116
+ async def handle_response(res):
117
  if res and res.get("value") == "file":
118
  files = None
119
  files = await cl.AskFileMessage(
120
+ content="Please upload a Text or PDF file to begin!",
121
  accept=["text/plain", "application/pdf"],
122
  max_size_mb=12,
123
  ).send()
 
144
  msg.content = f"Processing `{file.name}` done. You can now ask questions!"
145
  await msg.update()
146
 
147
+ if res and res.get("value") == "url":
148
+ await cl.Message(content="Submit a url link in the message box below.").send()
149
+
150
  if res and res.get("value") == "question":
151
  await cl.Message(content="Ask away!").send()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
helper_functions.py CHANGED
@@ -1,28 +1,23 @@
1
  from typing import List
2
- from langchain_community.document_loaders import PyMuPDFLoader, TextLoader, WebBaseLoader
3
  from langchain.agents import AgentExecutor, create_openai_functions_agent
 
4
  from langchain_community.vectorstores import Qdrant
5
  from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
6
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
7
  from langchain_core.language_models import BaseLanguageModel
8
  import os
9
  import functools
 
10
 
11
- def process_file(file_or_url):
12
- if isinstance(file_or_url, str) and file_or_url.startswith(('http://', 'https://')):
13
- # Handle URL
14
- loader = WebBaseLoader(file_or_url)
15
- docs = loader.load()
16
- documents.extend(docs)
17
  # save the file temporarily
18
- temp_file = "./"+file_or_url.path
19
  with open(temp_file, "wb") as file:
20
- file.write(file_or_url.content)
21
- file_name = file_or_url.name
22
 
23
  documents = []
24
- if file_or_url.path.endswith(".pdf"):
25
- loader = PyMuPDFLoader(temp_file)
26
  docs = loader.load()
27
  documents.extend(docs)
28
  else:
@@ -31,6 +26,41 @@ def process_file(file_or_url):
31
  documents.extend(docs)
32
  return documents
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  def add_to_qdrant(documents, embeddings, qdrant_client, collection_name):
36
  Qdrant.from_documents(
 
1
  from typing import List
 
2
  from langchain.agents import AgentExecutor, create_openai_functions_agent
3
+ from langchain_community.document_loaders import PyMuPDFLoader, TextLoader, UnstructuredURLLoader, WebBaseLoader
4
  from langchain_community.vectorstores import Qdrant
5
  from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
6
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
7
  from langchain_core.language_models import BaseLanguageModel
8
  import os
9
  import functools
10
+ import requests
11
 
12
+ def process_file(file):
 
 
 
 
 
13
  # save the file temporarily
14
+ temp_file = "./"+file.path
15
  with open(temp_file, "wb") as file:
16
+ file.write(file.content)
 
17
 
18
  documents = []
19
+ if file.path.endswith(".pdf"):
20
+ loader = PyMuPDF(temp_file)
21
  docs = loader.load()
22
  documents.extend(docs)
23
  else:
 
26
  documents.extend(docs)
27
  return documents
28
 
29
+ def load_documents_from_url(url):
30
+ try:
31
+ # Check if it's a PDF
32
+ if url.endswith(".pdf"):
33
+ try:
34
+ loader = PyMuPDFLoader(url)
35
+ return loader.load()
36
+ except Exception as e:
37
+ print(f"Error loading PDF from {url}: {e}")
38
+ return None
39
+
40
+ # Fetch the content and check for video pages
41
+ try:
42
+ response = requests.head(url, timeout=10) # Timeout for fetching headers
43
+ content_type = response.headers.get('Content-Type', '')
44
+ except Exception as e:
45
+ print(f"Error fetching headers from {url}: {e}")
46
+ return None
47
+
48
+ # Ignore video content (flagged for now)
49
+ if 'video' in content_type:
50
+ return None
51
+ if 'youtube' in url:
52
+ return None
53
+
54
+ # Otherwise, treat it as an HTML page
55
+ try:
56
+ loader = UnstructuredURLLoader([url])
57
+ return loader.load()
58
+ except Exception as e:
59
+ print(f"Error loading HTML from {url}: {e}")
60
+ return None
61
+ except Exception as e:
62
+ print(f"General error loading from {url}: {e}")
63
+ return None
64
 
65
  def add_to_qdrant(documents, embeddings, qdrant_client, collection_name):
66
  Qdrant.from_documents(