Spaces:

Lhumpal
/

beast-llm

Sleeping

Lhumpal commited on Mar 27

Commit

fc9568c

verified ·

1 Parent(s): e54b124

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -36,17 +36,18 @@ def build_faiss_vectorstore(chunks):
     print(f"Total number of documents: {num_documents}")
     return vectorstore
 # Function to retrieve similar text
 def retrieve(query, vectorstore, top_k=5):
     docs_and_scores = vectorstore.similarity_search_with_score(query=query, k=top_k)
-    # Correct handling of numpy float scores:
-    fixed_results = []
-    for doc, score in docs_and_scores:
-        if isinstance(score, np.floating):
-            score = float(score)  # Convert numpy float to standard Python float
-        fixed_results.append((doc, score))
-    return fixed_results
 class ChatRequest(BaseModel):
     message: str
@@ -65,6 +66,9 @@ class ChatRequest(BaseModel):
 dataset = load_dataset("Lhumpal/youtube-hunting-beast-transcripts", data_files={"concise": "concise/*", "raw": "raw/*"})
 concise_text = dataset["concise"]["text"]
 concise_text_string = "".join(concise_text)
 # Chunk and index the documents
 chunks = chunk_text(concise_text_string, chunk_size=450)

     print(f"Total number of documents: {num_documents}")
     return vectorstore
+# Function to retrieve similar text
 # Function to retrieve similar text
 def retrieve(query, vectorstore, top_k=5):
     docs_and_scores = vectorstore.similarity_search_with_score(query=query, k=top_k)
+    # Return (page_content, score) for docs that meet the score threshold
+    return [
+        (doc.page_content, float(score))  # Ensure score is a standard float
+        for doc, score in docs_and_scores
+        if float(score) >= 0.75
+    ]
 class ChatRequest(BaseModel):
     message: str
 dataset = load_dataset("Lhumpal/youtube-hunting-beast-transcripts", data_files={"concise": "concise/*", "raw": "raw/*"})
 concise_text = dataset["concise"]["text"]
 concise_text_string = "".join(concise_text)
+sample = "Big bucks like to bed in the tall grass and shade in the summer."
+concise_text_string += sample
 # Chunk and index the documents
 chunks = chunk_text(concise_text_string, chunk_size=450)