File size: 1,792 Bytes
f51bb92 d1afae8 f51bb92 ce9ef3e f51bb92 f2daaee 9a7da99 f51bb92 6158da4 b83cc65 f0018f2 f51bb92 e5cd1d3 6158da4 6d056d5 db6b619 6d056d5 6158da4 6d056d5 f2daaee d1afae8 dd677c3 f2daaee 7f989d6 f2daaee f51bb92 6158da4 b83cc65 6158da4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
log_dir: '../storage/logs' # str
log_chunk_dir: '../storage/logs/chunks' # str
device: 'cuda' # str [cuda, cpu]
vectorstore:
embedd_files: False # bool
data_path: '../storage/data' # str
url_file_path: '../storage/data/urls.txt' # str
expand_urls: True # bool
db_option : 'FAISS' # str [FAISS, Chroma, RAGatouille, RAPTOR]
db_path : '../vectorstores' # str
model : 'sentence-transformers/all-MiniLM-L6-v2' # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
search_top_k : 3 # int
score_threshold : 0.2 # float
faiss_params: # Not used as of now
index_path: '../vectorstores/faiss.index' # str
index_type: 'Flat' # str [Flat, HNSW, IVF]
index_dimension: 384 # int
index_nlist: 100 # int
index_nprobe: 10 # int
colbert_params:
index_name: "new_idx" # str
llm_params:
use_history: True # bool
memory_window: 3 # int
llm_loader: 'openai' # str [local_llm, openai]
openai_params:
model: 'gpt-3.5-turbo-1106' # str [gpt-3.5-turbo-1106, gpt-4]
local_llm_params:
model: 'tiny-llama'
temperature: 0.7
repo_id: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF' # HuggingFace repo id
filename: 'tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Specific name of gguf file in the repo
pdf_reader: 'llama' # str [llama, pymupdf, gpt]
chat_logging:
log_chat: False # bool
platform: 'literalai'
splitter_options:
use_splitter: True # bool
split_by_token : True # bool
remove_leftover_delimiters: True # bool
remove_chunks: False # bool
chunk_size : 300 # int
chunk_overlap : 30 # int
chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
front_chunks_to_remove : null # int or None
last_chunks_to_remove : null # int or None
delimiters_to_remove : ['\t', '\n', ' ', ' '] # list of strings |