user-feedback / utils /loaders.py
Ashmi Banerjee
trial
82a36a6
raw
history blame
738 Bytes
import pandas as pd
import os
from datasets import load_dataset
from dotenv import load_dotenv
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")
REPO_NAME = os.getenv("DATA_REPO")
DATA_FILES = os.getenv("LLAMA_DATA_FILES")
def load_data():
try:
#TODO: change this to load the data from the database (buggy for debugging)
data = pd.read_csv("../data/gemini_results_subset.csv")[:5]
return data
except Exception as e:
print("data not found, loading from huggingface dataset")
dataset = load_dataset(REPO_NAME, token=True, data_files=DATA_FILES, revision="main")
dataset.set_format(type='pandas') ## converting it into pandas
df = dataset["train"][:]
return df[:5]