Update app.py
Browse files
app.py
CHANGED
@@ -36,17 +36,18 @@ def build_faiss_vectorstore(chunks):
|
|
36 |
print(f"Total number of documents: {num_documents}")
|
37 |
return vectorstore
|
38 |
|
|
|
39 |
# Function to retrieve similar text
|
40 |
def retrieve(query, vectorstore, top_k=5):
|
41 |
docs_and_scores = vectorstore.similarity_search_with_score(query=query, k=top_k)
|
42 |
|
43 |
-
#
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
|
51 |
class ChatRequest(BaseModel):
|
52 |
message: str
|
@@ -65,6 +66,9 @@ class ChatRequest(BaseModel):
|
|
65 |
dataset = load_dataset("Lhumpal/youtube-hunting-beast-transcripts", data_files={"concise": "concise/*", "raw": "raw/*"})
|
66 |
concise_text = dataset["concise"]["text"]
|
67 |
concise_text_string = "".join(concise_text)
|
|
|
|
|
|
|
68 |
|
69 |
# Chunk and index the documents
|
70 |
chunks = chunk_text(concise_text_string, chunk_size=450)
|
|
|
36 |
print(f"Total number of documents: {num_documents}")
|
37 |
return vectorstore
|
38 |
|
39 |
+
# Function to retrieve similar text
|
40 |
# Function to retrieve similar text
|
41 |
def retrieve(query, vectorstore, top_k=5):
|
42 |
docs_and_scores = vectorstore.similarity_search_with_score(query=query, k=top_k)
|
43 |
|
44 |
+
# Return (page_content, score) for docs that meet the score threshold
|
45 |
+
return [
|
46 |
+
(doc.page_content, float(score)) # Ensure score is a standard float
|
47 |
+
for doc, score in docs_and_scores
|
48 |
+
if float(score) >= 0.75
|
49 |
+
]
|
50 |
+
|
51 |
|
52 |
class ChatRequest(BaseModel):
|
53 |
message: str
|
|
|
66 |
dataset = load_dataset("Lhumpal/youtube-hunting-beast-transcripts", data_files={"concise": "concise/*", "raw": "raw/*"})
|
67 |
concise_text = dataset["concise"]["text"]
|
68 |
concise_text_string = "".join(concise_text)
|
69 |
+
sample = "Big bucks like to bed in the tall grass and shade in the summer."
|
70 |
+
concise_text_string += sample
|
71 |
+
|
72 |
|
73 |
# Chunk and index the documents
|
74 |
chunks = chunk_text(concise_text_string, chunk_size=450)
|