File size: 1,792 Bytes
f51bb92
 
d1afae8
f51bb92
 
ce9ef3e
f51bb92
 
f2daaee
9a7da99
f51bb92
6158da4
b83cc65
f0018f2
f51bb92
 
 
 
 
 
 
 
e5cd1d3
 
 
6158da4
6d056d5
db6b619
6d056d5
6158da4
6d056d5
f2daaee
 
 
d1afae8
 
dd677c3
f2daaee
 
7f989d6
f2daaee
f51bb92
6158da4
 
 
 
 
b83cc65
 
6158da4
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
log_dir: '../storage/logs' # str
log_chunk_dir: '../storage/logs/chunks' # str
device: 'cuda' # str [cuda, cpu]

vectorstore:
  embedd_files: False # bool
  data_path: '../storage/data' # str
  url_file_path: '../storage/data/urls.txt' # str
  expand_urls: True # bool
  db_option : 'FAISS' # str [FAISS, Chroma, RAGatouille, RAPTOR]
  db_path : '../vectorstores' # str
  model : 'sentence-transformers/all-MiniLM-L6-v2' # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
  search_top_k : 3 # int
  score_threshold : 0.2 # float

  faiss_params: # Not used as of now
    index_path: '../vectorstores/faiss.index' # str
    index_type: 'Flat' # str [Flat, HNSW, IVF]
    index_dimension: 384 # int
    index_nlist: 100 # int
    index_nprobe: 10 # int

  colbert_params:
    index_name: "new_idx" # str

llm_params: 
  use_history: True # bool
  memory_window: 3 # int
  llm_loader: 'openai' # str [local_llm, openai]
  openai_params:
    model: 'gpt-3.5-turbo-1106' # str [gpt-3.5-turbo-1106, gpt-4]
  local_llm_params:
    model: 'tiny-llama'
    temperature: 0.7
    repo_id: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF' # HuggingFace repo id
    filename: 'tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Specific name of gguf file in the repo
  pdf_reader: 'llama' # str [llama, pymupdf, gpt]

chat_logging:
  log_chat: False # bool
  platform: 'literalai'

splitter_options:
  use_splitter: True # bool
  split_by_token : True # bool
  remove_leftover_delimiters: True # bool
  remove_chunks: False # bool
  chunk_size : 300 # int
  chunk_overlap : 30 # int
  chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
  front_chunks_to_remove : null # int or None
  last_chunks_to_remove : null # int or None
  delimiters_to_remove : ['\t', '\n', '   ', '  '] # list of strings