Spaces:
Running
Running
Refactor Dockerfile to create cache directory and update DocRetriever to filter by user_id
Browse files- Dockerfile +0 -2
- app/retriever/__init__.py +5 -6
Dockerfile
CHANGED
@@ -8,9 +8,7 @@ COPY . .
|
|
8 |
WORKDIR /app
|
9 |
|
10 |
# Create a cache directory and set permissions
|
11 |
-
# RUN chmod -R 777 /app
|
12 |
RUN mkdir -p /app/cache && chmod -R 777 /app/cache
|
13 |
-
# RUN mkdir -p /app/nltk_data && chmod -R 777 /app/nltk_data
|
14 |
|
15 |
# Install requirements.txt
|
16 |
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
|
|
8 |
WORKDIR /app
|
9 |
|
10 |
# Create a cache directory and set permissions
|
|
|
11 |
RUN mkdir -p /app/cache && chmod -R 777 /app/cache
|
|
|
12 |
|
13 |
# Install requirements.txt
|
14 |
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
app/retriever/__init__.py
CHANGED
@@ -26,16 +26,14 @@ class DocRetriever(BaseRetriever):
|
|
26 |
|
27 |
def __init__(self, req, k: int = 10) -> None:
|
28 |
super().__init__()
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
# if req.id != []:
|
33 |
-
# _filter.update({"id": {"$in": req.id}})
|
34 |
self.retriever = vectorstore.as_retriever(
|
35 |
search_type='similarity',
|
36 |
search_kwargs={
|
37 |
"k": k,
|
38 |
-
|
39 |
# "score_threshold": .1
|
40 |
}
|
41 |
)
|
@@ -44,6 +42,7 @@ class DocRetriever(BaseRetriever):
|
|
44 |
try:
|
45 |
retrieved_docs = self.retriever.invoke(query)
|
46 |
# doc_lst = []
|
|
|
47 |
for doc in retrieved_docs:
|
48 |
doc.metadata['id'] = doc.id
|
49 |
# date = str(doc.metadata['publishDate'])
|
|
|
26 |
|
27 |
def __init__(self, req, k: int = 10) -> None:
|
28 |
super().__init__()
|
29 |
+
_filter={}
|
30 |
+
_filter.update({"user_id": req.user_id})
|
31 |
+
print(_filter)
|
|
|
|
|
32 |
self.retriever = vectorstore.as_retriever(
|
33 |
search_type='similarity',
|
34 |
search_kwargs={
|
35 |
"k": k,
|
36 |
+
"filter": _filter,
|
37 |
# "score_threshold": .1
|
38 |
}
|
39 |
)
|
|
|
42 |
try:
|
43 |
retrieved_docs = self.retriever.invoke(query)
|
44 |
# doc_lst = []
|
45 |
+
print(retrieved_docs)
|
46 |
for doc in retrieved_docs:
|
47 |
doc.metadata['id'] = doc.id
|
48 |
# date = str(doc.metadata['publishDate'])
|