import pandas as pd import os from datasets import load_dataset from dotenv import load_dotenv load_dotenv() HF_TOKEN = os.getenv("HF_TOKEN") REPO_NAME = os.getenv("DATA_REPO") DATA_FILES = os.getenv("LLAMA_DATA_FILES") def load_data(): try: #TODO: change this to load the data from the database (buggy for debugging) data = pd.read_csv("../data/gemini_results_subset.csv")[:5] return data except Exception as e: print("data not found, loading from huggingface dataset") dataset = load_dataset(REPO_NAME, token=True, data_files=DATA_FILES, revision="main") dataset.set_format(type='pandas') ## converting it into pandas df = dataset["train"][:] return df[:5]