rahuln2002 commited on
Commit
601bccc
·
verified ·
1 Parent(s): aa1939f

Update knowledgeassistant/components/RAG.py

Browse files
Files changed (1) hide show
  1. knowledgeassistant/components/RAG.py +91 -91
knowledgeassistant/components/RAG.py CHANGED
@@ -1,92 +1,92 @@
1
- from knowledgeassistant.logging.logger import logging
2
- from knowledgeassistant.exception.exception import KnowledgeAssistantException
3
-
4
- from knowledgeassistant.entity.config_entity import RAGConfig
5
- from knowledgeassistant.utils.main_utils.utils import read_txt_file, write_txt_file
6
-
7
- import os
8
- import sys
9
- from langchain.text_splitter import RecursiveCharacterTextSplitter
10
- from langchain_core.documents import Document
11
- from langchain_ollama import OllamaEmbeddings
12
- from langchain_community.vectorstores import FAISS
13
- from together import Together
14
- from langchain.chains import RetrievalQA
15
- from langchain_core.language_models import LLM
16
-
17
- from dotenv import load_dotenv
18
- import typing
19
-
20
- load_dotenv()
21
- os.environ["TOGETHER_API_KEY"] = os.getenv("TOGETHER_API_KEY")
22
-
23
- class RAG:
24
- def __init__(self, rag_config: RAGConfig):
25
- try:
26
- self.rag_config = rag_config
27
- except Exception as e:
28
- raise KnowledgeAssistantException(e, sys)
29
-
30
- def split_text(self, input_text_path: str):
31
- try:
32
- text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
33
- raw_documents = text_splitter.split_text(text = read_txt_file(file_path = input_text_path))
34
- documents = [Document(page_content=text) for text in raw_documents]
35
- return documents
36
- except Exception as e:
37
- raise KnowledgeAssistantException(e, sys)
38
-
39
- def create_and_store_embeddings(self, documents: list):
40
- try:
41
- db = FAISS.from_documents(documents, OllamaEmbeddings(model="nomic-embed-text"))
42
- return db
43
- except Exception as e:
44
- raise KnowledgeAssistantException(e, sys)
45
-
46
- class TogetherLLM(LLM):
47
- model_name: str = "meta-llama/Llama-3-8b-chat-hf"
48
-
49
- @property
50
- def _llm_type(self) -> str:
51
- return "together_ai"
52
-
53
- def _call(self, prompt: str, stop: typing.Optional[typing.List[str]] = None) -> str:
54
- client = Together()
55
- response = client.chat.completions.create(
56
- model=self.model_name,
57
- messages=[{"role": "user", "content": prompt}],
58
- )
59
- return response.choices[0].message.content
60
-
61
- def retrieval(self, llm, db, query):
62
- try:
63
- chain = RetrievalQA.from_chain_type(
64
- llm=llm,
65
- retriever=db.as_retriever()
66
- )
67
- result = chain.invoke(query)
68
- return result
69
- except Exception as e:
70
- raise KnowledgeAssistantException(e, sys)
71
-
72
- def initiate_rag(self, input_text_path: str, query: str):
73
- try:
74
- docs = self.split_text(input_text_path = input_text_path)
75
- logging.info("Splitted Text into Chunks Successfully")
76
- store = self.create_and_store_embeddings(documents = docs)
77
- logging.info("Successfully stored vector embeddings")
78
- llm = self.TogetherLLM()
79
- logging.info("Successfully loaded the llm")
80
- result = self.retrieval(
81
- llm = llm,
82
- db = store,
83
- query = query
84
- )
85
- logging.info("Successfully Generated Results")
86
- write_txt_file(
87
- file_path = self.rag_config.rag_generated_text_path,
88
- content = result['result']
89
- )
90
- logging.info("Successfully wrote results in txt file")
91
- except Exception as e:
92
  raise KnowledgeAssistantException(e, sys)
 
1
+ from knowledgeassistant.logging.logger import logging
2
+ from knowledgeassistant.exception.exception import KnowledgeAssistantException
3
+
4
+ from knowledgeassistant.entity.config_entity import RAGConfig
5
+ from knowledgeassistant.utils.main_utils.utils import read_txt_file, write_txt_file
6
+
7
+ import os
8
+ import sys
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from langchain_core.documents import Document
11
+ from langchain_community.embeddings import HuggingFaceEmbeddings
12
+ from langchain_community.vectorstores import FAISS
13
+ from together import Together
14
+ from langchain.chains import RetrievalQA
15
+ from langchain_core.language_models import LLM
16
+
17
+ from dotenv import load_dotenv
18
+ import typing
19
+
20
+ load_dotenv()
21
+ os.environ["TOGETHER_API_KEY"] = os.getenv("TOGETHER_API_KEY")
22
+
23
+ class RAG:
24
+ def __init__(self, rag_config: RAGConfig):
25
+ try:
26
+ self.rag_config = rag_config
27
+ except Exception as e:
28
+ raise KnowledgeAssistantException(e, sys)
29
+
30
+ def split_text(self, input_text_path: str):
31
+ try:
32
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
33
+ raw_documents = text_splitter.split_text(text = read_txt_file(file_path = input_text_path))
34
+ documents = [Document(page_content=text) for text in raw_documents]
35
+ return documents
36
+ except Exception as e:
37
+ raise KnowledgeAssistantException(e, sys)
38
+
39
+ def create_and_store_embeddings(self, documents: list):
40
+ try:
41
+ db = FAISS.from_documents(documents, HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"))
42
+ return db
43
+ except Exception as e:
44
+ raise KnowledgeAssistantException(e, sys)
45
+
46
+ class TogetherLLM(LLM):
47
+ model_name: str = "meta-llama/Llama-3-8b-chat-hf"
48
+
49
+ @property
50
+ def _llm_type(self) -> str:
51
+ return "together_ai"
52
+
53
+ def _call(self, prompt: str, stop: typing.Optional[typing.List[str]] = None) -> str:
54
+ client = Together()
55
+ response = client.chat.completions.create(
56
+ model=self.model_name,
57
+ messages=[{"role": "user", "content": prompt}],
58
+ )
59
+ return response.choices[0].message.content
60
+
61
+ def retrieval(self, llm, db, query):
62
+ try:
63
+ chain = RetrievalQA.from_chain_type(
64
+ llm=llm,
65
+ retriever=db.as_retriever()
66
+ )
67
+ result = chain.invoke(query)
68
+ return result
69
+ except Exception as e:
70
+ raise KnowledgeAssistantException(e, sys)
71
+
72
+ def initiate_rag(self, input_text_path: str, query: str):
73
+ try:
74
+ docs = self.split_text(input_text_path = input_text_path)
75
+ logging.info("Splitted Text into Chunks Successfully")
76
+ store = self.create_and_store_embeddings(documents = docs)
77
+ logging.info("Successfully stored vector embeddings")
78
+ llm = self.TogetherLLM()
79
+ logging.info("Successfully loaded the llm")
80
+ result = self.retrieval(
81
+ llm = llm,
82
+ db = store,
83
+ query = query
84
+ )
85
+ logging.info("Successfully Generated Results")
86
+ write_txt_file(
87
+ file_path = self.rag_config.rag_generated_text_path,
88
+ content = result['result']
89
+ )
90
+ logging.info("Successfully wrote results in txt file")
91
+ except Exception as e:
92
  raise KnowledgeAssistantException(e, sys)