anubhav77 commited on
Commit
105ad74
·
1 Parent(s): ba0cd2e
Files changed (1) hide show
  1. src/chromaIntf.py +47 -1
src/chromaIntf.py CHANGED
@@ -5,6 +5,7 @@ from langchain.chains import RetrievalQA
5
  from langchain.embeddings import HuggingFaceBgeEmbeddings
6
  from langchain.retrievers.self_query.base import SelfQueryRetriever
7
  from langchain.chains.query_constructor.base import AttributeInfo
 
8
  from llm.llmFactory import LLMFactory
9
  from datetime import datetime
10
  import baseInfra.dropbox_handler as dbh
@@ -50,17 +51,61 @@ class ChromaIntf():
50
  persist_directory=self.persist_db_directory)
51
  #self.vectorstore._client.
52
 
 
 
 
 
 
 
 
 
 
53
  self.metadata_field_info = [
54
  AttributeInfo(
55
  name="timestamp",
56
- description="Python datetime.timestamp of the document in isoformat, can be used for getting date, year, month, time etc ",
57
  type="str",
58
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  AttributeInfo(
60
  name="source",
61
  description="Type of entry",
62
  type="string or list[string]",
63
  ),
 
 
 
 
 
 
 
 
 
 
64
  ]
65
  self.document_content_description = "Information to store for retrival from LLM based chatbot"
66
  lf=LLMFactory()
@@ -72,6 +117,7 @@ class ChromaIntf():
72
  self.vectorstore,
73
  self.document_content_description,
74
  self.metadata_field_info,
 
75
  verbose=True
76
  )
77
 
 
5
  from langchain.embeddings import HuggingFaceBgeEmbeddings
6
  from langchain.retrievers.self_query.base import SelfQueryRetriever
7
  from langchain.chains.query_constructor.base import AttributeInfo
8
+ from langchain.retrievers.self_query.chroma import ChromaTranslator
9
  from llm.llmFactory import LLMFactory
10
  from datetime import datetime
11
  import baseInfra.dropbox_handler as dbh
 
51
  persist_directory=self.persist_db_directory)
52
  #self.vectorstore._client.
53
 
54
+ # timestamp --> time when added
55
+ # source --> notes/references/web/youtube/book/conversation, default conversation
56
+ # title --> of document , will be conversation when source is conversation, default blank
57
+ # author --> will default to blank
58
+ # "Year": 2024,
59
+ #"Month": 1,
60
+ #"Day": 3,
61
+ #"Hour": 11,
62
+ #"Minute": 29
63
  self.metadata_field_info = [
64
  AttributeInfo(
65
  name="timestamp",
66
+ description="Python datetime.timestamp of the document in isoformat, should not be used for query",
67
  type="str",
68
  ),
69
+ AttributeInfo(
70
+ name="Year",
71
+ description="Year from the date when the entry was added in YYYY format",
72
+ type="int",
73
+ ),
74
+ AttributeInfo(
75
+ name="Month",
76
+ description="Month from the date when the entry was added it is from 1-12",
77
+ type="int",
78
+ ),
79
+ AttributeInfo(
80
+ name="Day",
81
+ description="Day of month from the date-time stamp when the entry was added, it is from 1-31",
82
+ type="int",
83
+ ),
84
+ AttributeInfo(
85
+ name="Hour",
86
+ description="Hour from the timestamp when the entry was added",
87
+ type="int",
88
+ ),
89
+ AttributeInfo(
90
+ name="Minute",
91
+ description="Minute from the timestamp when the entry was added",
92
+ type="int",
93
+ ),
94
  AttributeInfo(
95
  name="source",
96
  description="Type of entry",
97
  type="string or list[string]",
98
  ),
99
+ AttributeInfo(
100
+ name="title",
101
+ description="Title or Subject of the entry",
102
+ type="string",
103
+ ),
104
+ AttributeInfo(
105
+ name="author",
106
+ description="Author of the entry",
107
+ type="string",
108
+ )
109
  ]
110
  self.document_content_description = "Information to store for retrival from LLM based chatbot"
111
  lf=LLMFactory()
 
117
  self.vectorstore,
118
  self.document_content_description,
119
  self.metadata_field_info,
120
+ structured_query_translator=ChromaTranslator(),
121
  verbose=True
122
  )
123