gavinzli commited on
Commit
7097576
·
1 Parent(s): 5a42f57

Refactor Dockerfile to create cache directory and update DocRetriever to filter by user_id

Browse files
Files changed (2) hide show
  1. Dockerfile +0 -2
  2. app/retriever/__init__.py +5 -6
Dockerfile CHANGED
@@ -8,9 +8,7 @@ COPY . .
8
  WORKDIR /app
9
 
10
  # Create a cache directory and set permissions
11
- # RUN chmod -R 777 /app
12
  RUN mkdir -p /app/cache && chmod -R 777 /app/cache
13
- # RUN mkdir -p /app/nltk_data && chmod -R 777 /app/nltk_data
14
 
15
  # Install requirements.txt
16
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
 
8
  WORKDIR /app
9
 
10
  # Create a cache directory and set permissions
 
11
  RUN mkdir -p /app/cache && chmod -R 777 /app/cache
 
12
 
13
  # Install requirements.txt
14
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
app/retriever/__init__.py CHANGED
@@ -26,16 +26,14 @@ class DocRetriever(BaseRetriever):
26
 
27
  def __init__(self, req, k: int = 10) -> None:
28
  super().__init__()
29
- # _filter={}
30
- # if req.site != []:
31
- # _filter.update({"site": {"$in": req.site}})
32
- # if req.id != []:
33
- # _filter.update({"id": {"$in": req.id}})
34
  self.retriever = vectorstore.as_retriever(
35
  search_type='similarity',
36
  search_kwargs={
37
  "k": k,
38
- # "filter": _filter,
39
  # "score_threshold": .1
40
  }
41
  )
@@ -44,6 +42,7 @@ class DocRetriever(BaseRetriever):
44
  try:
45
  retrieved_docs = self.retriever.invoke(query)
46
  # doc_lst = []
 
47
  for doc in retrieved_docs:
48
  doc.metadata['id'] = doc.id
49
  # date = str(doc.metadata['publishDate'])
 
26
 
27
  def __init__(self, req, k: int = 10) -> None:
28
  super().__init__()
29
+ _filter={}
30
+ _filter.update({"user_id": req.user_id})
31
+ print(_filter)
 
 
32
  self.retriever = vectorstore.as_retriever(
33
  search_type='similarity',
34
  search_kwargs={
35
  "k": k,
36
+ "filter": _filter,
37
  # "score_threshold": .1
38
  }
39
  )
 
42
  try:
43
  retrieved_docs = self.retriever.invoke(query)
44
  # doc_lst = []
45
+ print(retrieved_docs)
46
  for doc in retrieved_docs:
47
  doc.metadata['id'] = doc.id
48
  # date = str(doc.metadata['publishDate'])