File size: 2,211 Bytes
58e78d3
13ba238
 
 
 
 
250af9b
87f5105
250af9b
13ba238
 
1350777
aad2007
 
1350777
 
aad2007
 
13ba238
b2b7a66
13ba238
 
 
 
b2b7a66
 
 
 
13ba238
aad2007
 
 
 
87f5105
aad2007
b2b7a66
 
13ba238
1350777
 
 
13ba238
1350777
 
 
87f5105
1350777
 
 
87f5105
 
 
 
1350777
87f5105
 
b2b7a66
1350777
 
13ba238
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import streamlit as st
import pandas as pd

def load_data():
    return pd.read_csv("benchmark_data.csv")

def case_insensitive_search(data, query, column):
    if query: 
        return data[data[column].str.lower().str.contains(query.lower())]
    return data

def display_table(data, rows_per_page=10, height=None):
    container = st.container()
    with container:
        if height is None:
            height = min(40 + rows_per_page * 38, 800)
        st.dataframe(data, height=height)

def main():
    st.title("Multihop-RAG Benchmark 💡")

    data = load_data()

    st.sidebar.header("Search Options")
    chat_model_query = st.sidebar.text_input("Chat Model")
    embedding_model_query = st.sidebar.text_input("Embedding Model")
    chunk_query = st.sidebar.text_input("Chunk") 
    frame_query = st.sidebar.text_input("Framework") 

    if chat_model_query:
        data = case_insensitive_search(data, chat_model_query, 'chat_model')
    if embedding_model_query:
        data = case_insensitive_search(data, embedding_model_query, 'embedding_model')
    if chunk_query:  
        data = case_insensitive_search(data, chunk_query, 'chunk')
    if frame_query:
        data = case_insensitive_search(data, frame_query, 'framework')

    # Display settings
    st.header("Settings")
    display_table(data[['framework', 'chat_model', 'embedding_model', 'chunk']])

    # Display retrieval metrics
    st.header("Retrieval Metrics")
    display_table(data[['MRR@10', 'Hit@10']])

    # Display response metrics
    st.header("Response Metrics")
    display_table(data[['Accuracy']])

    st.sidebar.header("Citation")
    st.sidebar.info(
        "Please cite this dataset as:\n"
        "Tang, Yixuan, and Yi Yang. MultiHop-RAG: Benchmarking Retrieval-Augmented Generation for Multi-Hop Queries. ArXiv, 2024, /abs/2401.15391."
    )
    st.markdown("---")
    st.caption("For citation, please use: 'Tang, Yixuan, and Yi Yang. MultiHop-RAG: Benchmarking Retrieval-Augmented Generation for Multi-Hop Queries. ArXiv, 2024,  /abs/2401.15391. '")
    st.markdown("---")
    st.caption("For results self-reporting, please send an email to [email protected]")

if __name__ == "__main__":
    main()