Technocoloredgeek commited on
Commit
213aef6
·
verified ·
1 Parent(s): b7a406f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from langchain_community.document_loaders import PyMuPDFLoader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain_openai import OpenAIEmbeddings, ChatOpenAI
6
+ from langchain_community.vectorstores import Qdrant
7
+ from langchain.prompts import ChatPromptTemplate
8
+ from langchain_core.output_parsers import StrOutputParser
9
+ from langchain_core.runnables import RunnablePassthrough
10
+ from qdrant_client import QdrantClient
11
+
12
+ # Set up API keys
13
+ os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"]
14
+
15
+ # PDF links
16
+ pdf_links = [
17
+ "https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf",
18
+ "https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf"
19
+ ]
20
+
21
+ # Load and process PDFs
22
+ @st.cache_resource
23
+ def load_and_process_pdfs(pdf_links):
24
+ documents = []
25
+ for link in pdf_links:
26
+ loader = PyMuPDFLoader(file_path=link)
27
+ documents.extend(loader.load())
28
+
29
+ text_splitter = RecursiveCharacterTextSplitter(
30
+ chunk_size=500,
31
+ chunk_overlap=40,
32
+ length_function=len,
33
+ is_separator_regex=False
34
+ )
35
+
36
+ return text_splitter.split_documents(documents)
37
+
38
+ # Set up Qdrant and embeddings
39
+ @st.cache_resource
40
+ def setup_vectorstore():
41
+ qdrant_client = QdrantClient(":memory:")
42
+ embeddings = OpenAIEmbeddings()
43
+
44
+ COLLECTION_NAME = "AI_Ethics_Framework"
45
+ vector_store = Qdrant(
46
+ client=qdrant_client,
47
+ collection_name=COLLECTION_NAME,
48
+ embeddings=embeddings
49
+ )
50
+
51
+ # Check if collection exists, if not, create and populate it
52
+ collections = qdrant_client.get_collections().collections
53
+ if not any(collection.name == COLLECTION_NAME for collection in collections):
54
+ documents = load_and_process_pdfs(pdf_links)
55
+ vector_store.add_documents(documents)
56
+
57
+ return vector_store
58
+
59
+ # Create RAG pipeline
60
+ @st.cache_resource
61
+ def create_rag_pipeline(vector_store):
62
+ retriever = vector_store.as_retriever()
63
+
64
+ template = """
65
+ You are an expert AI assistant with deep knowledge of business, technology, and entrepreneurship. Your task is to provide accurate, insightful answers based solely on the given context. Follow these guidelines:
66
+
67
+ 1. Analyze the question carefully to understand the core information being sought.
68
+ 2. Thoroughly examine the provided context, identifying key relevant information.
69
+ 3. Formulate a clear, concise answer that directly addresses the question.
70
+ 4. Use specific details and examples from the context to support your answer.
71
+ 5. If the context doesn't contain sufficient information to fully answer the question, state this clearly and say,'I don't know'.
72
+ 6. Do not introduce any information not present in the context.
73
+ 7. If asked for an opinion or recommendation, base it strictly on insights from the context.
74
+ 8. Use