Lhumpal commited on
Commit
fc9568c
·
verified ·
1 Parent(s): e54b124

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -36,17 +36,18 @@ def build_faiss_vectorstore(chunks):
36
  print(f"Total number of documents: {num_documents}")
37
  return vectorstore
38
 
 
39
  # Function to retrieve similar text
40
  def retrieve(query, vectorstore, top_k=5):
41
  docs_and_scores = vectorstore.similarity_search_with_score(query=query, k=top_k)
42
 
43
- # Correct handling of numpy float scores:
44
- fixed_results = []
45
- for doc, score in docs_and_scores:
46
- if isinstance(score, np.floating):
47
- score = float(score) # Convert numpy float to standard Python float
48
- fixed_results.append((doc, score))
49
- return fixed_results
50
 
51
  class ChatRequest(BaseModel):
52
  message: str
@@ -65,6 +66,9 @@ class ChatRequest(BaseModel):
65
  dataset = load_dataset("Lhumpal/youtube-hunting-beast-transcripts", data_files={"concise": "concise/*", "raw": "raw/*"})
66
  concise_text = dataset["concise"]["text"]
67
  concise_text_string = "".join(concise_text)
 
 
 
68
 
69
  # Chunk and index the documents
70
  chunks = chunk_text(concise_text_string, chunk_size=450)
 
36
  print(f"Total number of documents: {num_documents}")
37
  return vectorstore
38
 
39
+ # Function to retrieve similar text
40
  # Function to retrieve similar text
41
  def retrieve(query, vectorstore, top_k=5):
42
  docs_and_scores = vectorstore.similarity_search_with_score(query=query, k=top_k)
43
 
44
+ # Return (page_content, score) for docs that meet the score threshold
45
+ return [
46
+ (doc.page_content, float(score)) # Ensure score is a standard float
47
+ for doc, score in docs_and_scores
48
+ if float(score) >= 0.75
49
+ ]
50
+
51
 
52
  class ChatRequest(BaseModel):
53
  message: str
 
66
  dataset = load_dataset("Lhumpal/youtube-hunting-beast-transcripts", data_files={"concise": "concise/*", "raw": "raw/*"})
67
  concise_text = dataset["concise"]["text"]
68
  concise_text_string = "".join(concise_text)
69
+ sample = "Big bucks like to bed in the tall grass and shade in the summer."
70
+ concise_text_string += sample
71
+
72
 
73
  # Chunk and index the documents
74
  chunks = chunk_text(concise_text_string, chunk_size=450)