File size: 3,414 Bytes
873ae70
 
ec0498e
873ae70
 
 
 
d41df51
ec0498e
 
 
 
873ae70
b1c60f6
873ae70
b1c60f6
 
873ae70
 
b1c60f6
873ae70
b1c60f6
873ae70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1c60f6
 
 
873ae70
 
 
b1c60f6
 
873ae70
 
 
 
 
ec0498e
b1c60f6
 
 
 
 
 
 
 
873ae70
b1c60f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0dea963
b1c60f6
873ae70
b1c60f6
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import streamlit as st
import pandas as pd
from transformers import pipeline

# Set the page layout for Streamlit
st.set_page_config(layout="wide")

# Initialize TAPAS pipeline for table-based question answering (multilingual)
tqa = pipeline(task="table-question-answering", 
              model="google/tapas-large-finetuned-wtq",
              device=0)  # Assuming GPU is available, otherwise set device="cpu"

# Title and Introduction
st.title("Data Table with TAPAS NLP Integration")
st.markdown(""" 
    This app allows you to upload a table (CSV or Excel) and ask questions to extract information from the data.
    Using **TAPAS**, the app can interpret your questions and provide the corresponding answers.

    ### Available Features:
    - **Table Question Answering**: Ask questions related to the uploaded table.
    
    Upload your data and ask questions to extract answers.
""")

# File uploader in the sidebar
file_name = st.sidebar.file_uploader("Upload file:", type=['csv', 'xlsx'])

# File processing and question answering
if file_name is None:
    st.markdown('<p class="font">Please upload an excel or csv file </p>', unsafe_allow_html=True)
else:
    try:
        # Check file type and handle reading accordingly
        if file_name.name.endswith('.csv'):
            df = pd.read_csv(file_name, sep=';', encoding='ISO-8859-1')  # Adjust encoding if needed
        elif file_name.name.endswith('.xlsx'):
            df = pd.read_excel(file_name, engine='openpyxl')  # Use openpyxl to read .xlsx files
        else:
            st.error("Unsupported file type")
            df = None

        if df is not None:
            # Convert object columns to numeric where possible
            df = df.apply(pd.to_numeric, errors='ignore')

            st.write("Original Data:")
            st.write(df)

            # Display a sample of data for user reference
            st.write("Sample data:")
            st.write(df.head())

    except Exception as e:
        st.error(f"Error reading file: {str(e)}")

    # User input for the question
    question = st.text_input(f'Ask your question related to the table')

    with st.spinner():
        if st.button('Get Answer'):
            try:
                # Ensure the question is a valid string
                if not question or not isinstance(question, str):
                    st.error("Please enter a valid question.")
                else:
                    # Use TAPAS model to process the question
                    result = tqa(table=df, query=question)

                    # Display the raw output from TAPAS
                    st.write("TAPAS Raw Output (Response):")
                    st.write(result)  # This will display the raw output from TAPAS

                    # Optionally, you can output the raw output as plain text:
                    st.text("Raw TAPAS Output (Plain Text):")
                    st.text(str(result))  # This will display raw output as plain text

                    # Check if TAPAS is returning the expected answer
                    answer = result.get('answer', None)
                    if answer:
                        st.write(f"TAPAS Answer: {answer}")
                    else:
                        st.warning("TAPAS did not return a valid answer.")

            except Exception as e:
                st.warning(f"Error processing question or generating answer: {str(e)}")