Spaces:

ganesh3
/

rag-youtube-assistant

Running

App Files Files Community

ganesh3 commited on Oct 27, 2024

Commit

66a5452

1 Parent(s): a61b32e

fifth commit

Browse files

Files changed (28) hide show

README.md +26 -1
Screenshots.md +27 -0
app/database.py +162 -1
app/evaluation.py +134 -21
app/generate_ground_truth.py +113 -38
app/main.py +88 -42
app/rag_evaluation.py +7 -5
data/evaluation_results.csv +181 -0
data/ground-truth-retrieval.csv +25 -0
data/sqlite.db +0 -0
docker-compose.yaml +11 -2
grafana/dashboards/rag_evaluation.json +172 -0
grafana/provisioning/dashboards/dashboards.yaml +14 -0
grafana/provisioning/dashboards/rag_evaluation.json +0 -129
grafana/provisioning/datasources/sqlite.yaml +18 -3
image-1.png +0 -0
image-10.png +0 -0
image-11.png +0 -0
image-2.png +0 -0
image-3.png +0 -0
image-4.png +0 -0
image-5.png +0 -0
image-6.png +0 -0
image-7.png +0 -0
image-8.png +0 -0
image-9.png +0 -0
image.png +0 -0
run-docker-compose.sh +43 -16

README.md CHANGED Viewed

@@ -100,7 +100,32 @@ youtube-rag-app/
 ## Getting Started
 git clone [email protected]:ganesh3/rag-youtube-assistant.git
-run-docker-compose-windows.ps1
 ## License
 GPL v3

 ## Getting Started
 git clone [email protected]:ganesh3/rag-youtube-assistant.git
+cd rag-youtube-assistant
+docker-compose build app
+docker-compose up -d
+You need to have Docker Desktop installed on your laptop/workstation along with WSL2 on windows machine.
 ## License
 GPL v3
+### Interface
+I use Streamlit to ingest the youtube transcripts, query the transcripts uing LLM & RAG, generate ground truth and evaluate the ground truth.
+### Ingestion
+I am ingesting Youtube transcripts using Youtube Data API v3 and Youtube Transcript package and the code is in transcript_extractor.py and it is run on the Streamlit app using main.py.
+### Retrieval
+"hit_rate":1, "mrr":1
+### RAG Flow
+I used the LLM as a Judge metric to evaluate the quality of our RAG Flow on my local machine with CPU and hence the total records evaluated are pretty low (12).
+* RELEVANT - 12 (100%)
+* PARTLY_RELEVANT - 0 (0%)
+* NON RELEVANT - 0 (0%)

Screenshots.md ADDED Viewed

	@@ -0,0 +1,27 @@

+### Docker deployment
+![alt text](image-2.png)
+### Ingestion
+![alt text](image-3.png)
+![alt text](image-7.png)
+### RAG
+![alt text](image-4.png)
+![alt text](image-8.png)
+![alt text](image-9.png)
+![alt text](image-10.png)
+![alt text](image-11.png)
+### Ground Truth Generation
+![alt text](image-6.png)
+![alt text](image-5.png)
+### RAG Evaluation
+![alt text](image.png)
+![alt text](image-1.png)

app/database.py CHANGED Viewed

@@ -11,6 +11,7 @@ class DatabaseHandler:
     def create_tables(self):
         with sqlite3.connect(self.db_path) as conn:
             cursor = conn.cursor()
             cursor.execute('''
                 CREATE TABLE IF NOT EXISTS videos (
                     id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -53,6 +54,50 @@ class DatabaseHandler:
                     FOREIGN KEY (embedding_model_id) REFERENCES embedding_models (id)
                 )
             ''')
             conn.commit()
     def update_schema(self):
@@ -186,4 +231,120 @@ class DatabaseHandler:
     #             SET transcript_content = ?
     #             WHERE youtube_id = ?
     #         ''', (transcript_content, youtube_id))
-    #         conn.commit()

     def create_tables(self):
         with sqlite3.connect(self.db_path) as conn:
             cursor = conn.cursor()
+            # Existing tables
             cursor.execute('''
                 CREATE TABLE IF NOT EXISTS videos (
                     id INTEGER PRIMARY KEY AUTOINCREMENT,
                     FOREIGN KEY (embedding_model_id) REFERENCES embedding_models (id)
                 )
             ''')
+            # New tables for ground truth and evaluation
+            cursor.execute('''
+                CREATE TABLE IF NOT EXISTS ground_truth (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    video_id TEXT,
+                    question TEXT,
+                    generation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    UNIQUE(video_id, question)
+                )
+            ''')
+            cursor.execute('''
+                CREATE TABLE IF NOT EXISTS search_performance (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    video_id TEXT,
+                    hit_rate REAL,
+                    mrr REAL,
+                    evaluation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+            ''')
+            cursor.execute('''
+                CREATE TABLE IF NOT EXISTS search_parameters (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    video_id TEXT,
+                    parameter_name TEXT,
+                    parameter_value REAL,
+                    score REAL,
+                    evaluation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+            ''')
+            cursor.execute('''
+                CREATE TABLE IF NOT EXISTS rag_evaluations (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    video_id TEXT,
+                    question TEXT,
+                    answer TEXT,
+                    relevance TEXT,
+                    explanation TEXT,
+                    evaluation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+            ''')
             conn.commit()
     def update_schema(self):
     #             SET transcript_content = ?
     #             WHERE youtube_id = ?
     #         ''', (transcript_content, youtube_id))
+    #         conn.commit()
+    def add_ground_truth_questions(self, video_id, questions):
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.cursor()
+            for question in questions:
+                try:
+                    cursor.execute('''
+                        INSERT OR IGNORE INTO ground_truth (video_id, question)
+                        VALUES (?, ?)
+                    ''', (video_id, question))
+                except sqlite3.IntegrityError:
+                    continue  # Skip duplicate questions
+            conn.commit()
+    def get_ground_truth_by_video(self, video_id):
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                SELECT gt.*, v.channel_name
+                FROM ground_truth gt
+                JOIN videos v ON gt.video_id = v.youtube_id
+                WHERE gt.video_id = ?
+                ORDER BY gt.generation_date DESC
+            ''', (video_id,))
+            return cursor.fetchall()
+    def get_ground_truth_by_channel(self, channel_name):
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                SELECT gt.*, v.channel_name
+                FROM ground_truth gt
+                JOIN videos v ON gt.video_id = v.youtube_id
+                WHERE v.channel_name = ?
+                ORDER BY gt.generation_date DESC
+            ''', (channel_name,))
+            return cursor.fetchall()
+    def get_all_ground_truth(self):
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                SELECT gt.*, v.channel_name
+                FROM ground_truth gt
+                JOIN videos v ON gt.video_id = v.youtube_id
+                ORDER BY gt.generation_date DESC
+            ''')
+            return cursor.fetchall()
+    def save_search_performance(self, video_id, hit_rate, mrr):
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                INSERT INTO search_performance (video_id, hit_rate, mrr)
+                VALUES (?, ?, ?)
+            ''', (video_id, hit_rate, mrr))
+            conn.commit()
+    def save_search_parameters(self, video_id, parameters, score):
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.cursor()
+            for param_name, param_value in parameters.items():
+                cursor.execute('''
+                    INSERT INTO search_parameters (video_id, parameter_name, parameter_value, score)
+                    VALUES (?, ?, ?, ?)
+                ''', (video_id, param_name, param_value, score))
+            conn.commit()
+    def save_rag_evaluation(self, evaluation_data):
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                INSERT INTO rag_evaluations
+                (video_id, question, answer, relevance, explanation)
+                VALUES (?, ?, ?, ?, ?)
+            ''', (
+                evaluation_data['video_id'],
+                evaluation_data['question'],
+                evaluation_data['answer'],
+                evaluation_data['relevance'],
+                evaluation_data['explanation']
+            ))
+            conn.commit()
+    def get_latest_evaluation_results(self, video_id=None):
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.cursor()
+            if video_id:
+                cursor.execute('''
+                    SELECT * FROM rag_evaluations
+                    WHERE video_id = ?
+                    ORDER BY evaluation_date DESC
+                ''', (video_id,))
+            else:
+                cursor.execute('''
+                    SELECT * FROM rag_evaluations
+                    ORDER BY evaluation_date DESC
+                ''')
+            return cursor.fetchall()
+    def get_latest_search_performance(self, video_id=None):
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.cursor()
+            if video_id:
+                cursor.execute('''
+                    SELECT * FROM search_performance
+                    WHERE video_id = ?
+                    ORDER BY evaluation_date DESC
+                    LIMIT 1
+                ''', (video_id,))
+            else:
+                cursor.execute('''
+                    SELECT * FROM search_performance
+                    ORDER BY evaluation_date DESC
+                ''')
+            return cursor.fetchall()

app/evaluation.py CHANGED Viewed

@@ -3,6 +3,10 @@ import numpy as np
 import pandas as pd
 import json
 import ollama
 class EvaluationSystem:
     def __init__(self, data_processor, database_handler):
@@ -42,7 +46,7 @@ class EvaluationSystem:
             relevance_scores.append(self.relevance_scoring(query, retrieved_docs))
             similarity_scores.append(self.answer_similarity(generated_answer, reference))
-            human_scores.append(self.human_evaluation(index_name, query))  # Assuming index_name can be used as video_id
         return {
             "avg_relevance_score": np.mean(relevance_scores),
@@ -64,17 +68,16 @@ class EvaluationSystem:
             print(f"Error in LLM evaluation: {str(e)}")
             return None
-    def evaluate_rag(self, rag_system, ground_truth_file, sample_size=200, prompt_template=None):
         try:
             ground_truth = pd.read_csv(ground_truth_file)
         except FileNotFoundError:
             print("Ground truth file not found. Please generate ground truth data first.")
             return None
-        sample = ground_truth.sample(n=min(sample_size, len(ground_truth)), random_state=1)
         evaluations = []
-        for _, row in sample.iterrows():
             question = row['question']
             video_id = row['video_id']
@@ -93,22 +96,132 @@ class EvaluationSystem:
             if prompt_template:
                 evaluation = self.llm_as_judge(question, answer_llm, prompt_template)
                 if evaluation:
-                    evaluations.append((
-                        str(video_id),
-                        str(question),
-                        str(answer_llm),
-                        str(evaluation.get('Relevance', 'UNKNOWN')),
-                        str(evaluation.get('Explanation', 'No explanation provided'))
-                    ))
             else:
-                # Fallback to cosine similarity if no prompt template is provided
                 similarity = self.answer_similarity(answer_llm, row.get('reference_answer', ''))
-                evaluations.append((
-                    str(video_id),
-                    str(question),
-                    str(answer_llm),
-                    f"Similarity: {similarity}",
-                    "Cosine similarity used for evaluation"
-                ))
-        return evaluations

 import pandas as pd
 import json
 import ollama
+import requests
+import sqlite3
+from tqdm import tqdm
+import csv
 class EvaluationSystem:
     def __init__(self, data_processor, database_handler):
             relevance_scores.append(self.relevance_scoring(query, retrieved_docs))
             similarity_scores.append(self.answer_similarity(generated_answer, reference))
+            human_scores.append(self.human_evaluation(index_name, query))
         return {
             "avg_relevance_score": np.mean(relevance_scores),
             print(f"Error in LLM evaluation: {str(e)}")
             return None
+    def evaluate_rag(self, rag_system, ground_truth_file, prompt_template=None):
         try:
             ground_truth = pd.read_csv(ground_truth_file)
         except FileNotFoundError:
             print("Ground truth file not found. Please generate ground truth data first.")
             return None
         evaluations = []
+        for _, row in tqdm(ground_truth.iterrows(), total=len(ground_truth)):
             question = row['question']
             video_id = row['video_id']
             if prompt_template:
                 evaluation = self.llm_as_judge(question, answer_llm, prompt_template)
                 if evaluation:
+                    evaluations.append({
+                        'video_id': str(video_id),
+                        'question': str(question),
+                        'answer': str(answer_llm),
+                        'relevance': str(evaluation.get('Relevance', 'UNKNOWN')),
+                        'explanation': str(evaluation.get('Explanation', 'No explanation provided'))
+                    })
             else:
                 similarity = self.answer_similarity(answer_llm, row.get('reference_answer', ''))
+                evaluations.append({
+                    'video_id': str(video_id),
+                    'question': str(question),
+                    'answer': str(answer_llm),
+                    'relevance': f"Similarity: {similarity}",
+                    'explanation': "Cosine similarity used for evaluation"
+                })
+        # Save evaluations to CSV
+        csv_path = 'data/evaluation_results.csv'
+        with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile:
+            fieldnames = ['video_id', 'question', 'answer', 'relevance', 'explanation']
+            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+            writer.writeheader()
+            for eval_data in evaluations:
+                writer.writerow(eval_data)
+        print(f"Evaluation results saved to {csv_path}")
+        # Save evaluations to database
+        self.save_evaluations_to_db(evaluations)
+        return evaluations
+    def save_evaluations_to_db(self, evaluations):
+        with sqlite3.connect(self.db_handler.db_path) as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+            CREATE TABLE IF NOT EXISTS rag_evaluations (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                video_id TEXT,
+                question TEXT,
+                answer TEXT,
+                relevance TEXT,
+                explanation TEXT
+            )
+            ''')
+            for eval_data in evaluations:
+                cursor.execute('''
+                INSERT INTO rag_evaluations (video_id, question, answer, relevance, explanation)
+                VALUES (?, ?, ?, ?, ?)
+                ''', (eval_data['video_id'], eval_data['question'], eval_data['answer'],
+                      eval_data['relevance'], eval_data['explanation']))
+            conn.commit()
+        print("Evaluation results saved to database")
+    def run_full_evaluation(self, rag_system, ground_truth_file, prompt_template=None):
+        # Load ground truth
+        ground_truth = pd.read_csv(ground_truth_file)
+        # Evaluate RAG
+        rag_evaluations = self.evaluate_rag(rag_system, ground_truth_file, prompt_template)
+        # Evaluate search performance
+        def search_function(query, video_id):
+            index_name = self.db_handler.get_elasticsearch_index_by_youtube_id(video_id)
+            if index_name:
+                return rag_system.data_processor.search(query, num_results=10, method='hybrid', index_name=index_name)
+            return []
+        search_performance = self.evaluate_search(ground_truth, search_function)
+        # Optimize search parameters
+        param_ranges = {'content': (0.0, 3.0)}  # Example parameter range
+        def objective_function(params):
+            def parameterized_search(query, video_id):
+                index_name = self.db_handler.get_elasticsearch_index_by_youtube_id(video_id)
+                if index_name:
+                    return rag_system.data_processor.search(query, num_results=10, method='hybrid', index_name=index_name, boost_dict=params)
+                return []
+            return self.evaluate_search(ground_truth, parameterized_search)['mrr']
+        best_params, best_score = self.simple_optimize(param_ranges, objective_function)
+        return {
+            "rag_evaluations": rag_evaluations,
+            "search_performance": search_performance,
+            "best_params": best_params,
+            "best_score": best_score
+        }
+    def hit_rate(self, relevance_total):
+        return sum(any(line) for line in relevance_total) / len(relevance_total)
+    def mrr(self, relevance_total):
+        scores = []
+        for line in relevance_total:
+            for rank, relevant in enumerate(line, 1):
+                if relevant:
+                    scores.append(1 / rank)
+                    break
+            else:
+                scores.append(0)
+        return sum(scores) / len(scores)
+    def simple_optimize(self, param_ranges, objective_function, n_iterations=10):
+        best_params = None
+        best_score = float('-inf')
+        for _ in range(n_iterations):
+            current_params = {param: np.random.uniform(min_val, max_val)
+                              for param, (min_val, max_val) in param_ranges.items()}
+            current_score = objective_function(current_params)
+            if current_score > best_score:
+                best_score = current_score
+                best_params = current_params
+        return best_params, best_score
+    def evaluate_search(self, ground_truth, search_function):
+        relevance_total = []
+        for _, row in tqdm(ground_truth.iterrows(), total=len(ground_truth)):
+            video_id = row['video_id']
+            results = search_function(row['question'], video_id)
+            relevance = [d['video_id'] == video_id for d in results]
+            relevance_total.append(relevance)
+        return {
+            'hit_rate': self.hit_rate(relevance_total),
+            'mrr': self.mrr(relevance_total),
+        }

app/generate_ground_truth.py CHANGED Viewed

@@ -46,13 +46,13 @@ def get_transcript_from_sqlite(db_path, video_id):
         logger.error(f"Error retrieving transcript from SQLite: {str(e)}")
     return None
-def generate_questions(transcript):
     prompt_template = """
     You are an AI assistant tasked with generating questions based on a YouTube video transcript.
-    Formulate at least 10 questions that a user might ask based on the provided transcript.
     Make the questions specific to the content of the transcript.
     The questions should be complete and not too short. Use as few words as possible from the transcript.
-    It is important that the questions are relevant to the content of the transcript and are at least 10 in number.
     The transcript:
@@ -63,60 +63,121 @@ def generate_questions(transcript):
     {{"questions": ["question1", "question2", ..., "question10"]}}
     """.strip()
-    prompt = prompt_template.format(transcript=transcript)
-    try:
-        response = ollama.chat(
-            model='phi3.5',
-            messages=[{"role": "user", "content": prompt}]
-        )
-        return json.loads(response['message']['content'])
-    except Exception as e:
-        logger.error(f"Error generating questions: {str(e)}")
-        return None
 def generate_ground_truth(db_handler, data_processor, video_id):
     es = Elasticsearch([f'http://{os.getenv("ELASTICSEARCH_HOST", "localhost")}:{os.getenv("ELASTICSEARCH_PORT", "9200")}'])
-    # Get the index name for the video
-    index_name = db_handler.get_elasticsearch_index_by_youtube_id(video_id)
-    if not index_name:
-        logger.error(f"No Elasticsearch index found for video {video_id}")
-        return None
-    # Extract the model name from the index name
-    model_name = extract_model_name(index_name)
-    if not model_name:
-        logger.error(f"Could not extract model name from index name: {index_name}")
-        return None
     transcript = None
     if index_name:
         transcript = get_transcript_from_elasticsearch(es, index_name, video_id)
-        logger.info(f"Transcript to generate questions using elasticsearch is {transcript}")
     if not transcript:
         transcript = db_handler.get_transcript_content(video_id)
-        logger.info(f"Transcript to generate questions using textual data is {transcript}")
     if not transcript:
         logger.error(f"Failed to retrieve transcript for video {video_id}")
         return None
-    questions = generate_questions(transcript)
-    if questions and 'questions' in questions:
-        df = pd.DataFrame([(video_id, q) for q in questions['questions']], columns=['video_id', 'question'])
-        csv_path = 'data/ground-truth-retrieval.csv'
         df.to_csv(csv_path, index=False)
-        logger.info(f"Ground truth data saved to {csv_path}")
-        return df
     else:
-        logger.error("Failed to generate questions.")
-    return None
 def generate_ground_truth_for_all_videos(db_handler, data_processor):
     videos = db_handler.get_all_videos()
@@ -136,4 +197,18 @@ def generate_ground_truth_for_all_videos(db_handler, data_processor):
         return df
     else:
         logger.error("Failed to generate questions for any video.")
-        return None

         logger.error(f"Error retrieving transcript from SQLite: {str(e)}")
     return None
+def generate_questions(transcript, max_retries=3):
     prompt_template = """
     You are an AI assistant tasked with generating questions based on a YouTube video transcript.
+    Formulate EXACTLY 10 questions that a user might ask based on the provided transcript.
     Make the questions specific to the content of the transcript.
     The questions should be complete and not too short. Use as few words as possible from the transcript.
+    Ensure that all 10 questions are unique and not repetitive.
     The transcript:
     {{"questions": ["question1", "question2", ..., "question10"]}}
     """.strip()
+    all_questions = set()
+    retries = 0
+    while len(all_questions) < 10 and retries < max_retries:
+        prompt = prompt_template.format(transcript=transcript)
+        try:
+            response = ollama.chat(
+                model='phi3.5',
+                messages=[{"role": "user", "content": prompt}]
+            )
+            questions = json.loads(response['message']['content'])['questions']
+            all_questions.update(questions)
+        except Exception as e:
+            logger.error(f"Error generating questions: {str(e)}")
+        retries += 1
+    if len(all_questions) < 10:
+        logger.warning(f"Could only generate {len(all_questions)} unique questions after {max_retries} attempts.")
+    return {"questions": list(all_questions)[:10]}
 def generate_ground_truth(db_handler, data_processor, video_id):
     es = Elasticsearch([f'http://{os.getenv("ELASTICSEARCH_HOST", "localhost")}:{os.getenv("ELASTICSEARCH_PORT", "9200")}'])
+    # Get existing questions for this video to avoid duplicates
+    existing_questions = set(q[1] for q in db_handler.get_ground_truth_by_video(video_id))
     transcript = None
+    index_name = db_handler.get_elasticsearch_index_by_youtube_id(video_id)
     if index_name:
         transcript = get_transcript_from_elasticsearch(es, index_name, video_id)
     if not transcript:
         transcript = db_handler.get_transcript_content(video_id)
     if not transcript:
         logger.error(f"Failed to retrieve transcript for video {video_id}")
         return None
+    # Generate questions until we have 10 unique ones
+    all_questions = set()
+    max_attempts = 3
+    attempts = 0
+    while len(all_questions) < 10 and attempts < max_attempts:
+        questions = generate_questions(transcript)
+        if questions and 'questions' in questions:
+            new_questions = set(questions['questions']) - existing_questions
+            all_questions.update(new_questions)
+        attempts += 1
+    if not all_questions:
+        logger.error("Failed to generate any unique questions.")
+        return None
+    # Store questions in database
+    db_handler.add_ground_truth_questions(video_id, all_questions)
+    # Create DataFrame and save to CSV
+    df = pd.DataFrame([(video_id, q) for q in all_questions], columns=['video_id', 'question'])
+    csv_path = 'data/ground-truth-retrieval.csv'
+    # Append to existing CSV if it exists, otherwise create new
+    if os.path.exists(csv_path):
+        df.to_csv(csv_path, mode='a', header=False, index=False)
+    else:
         df.to_csv(csv_path, index=False)
+    logger.info(f"Ground truth data saved to {csv_path}")
+    return df
+def get_ground_truth_display_data(db_handler, video_id=None, channel_name=None):
+    """Get ground truth data from both database and CSV file"""
+    import pandas as pd
+    # Try to get data from database first
+    if video_id:
+        data = db_handler.get_ground_truth_by_video(video_id)
+    elif channel_name:
+        data = db_handler.get_ground_truth_by_channel(channel_name)
     else:
+        data = []
+    # Create DataFrame from database data
+    if data:
+        db_df = pd.DataFrame(data, columns=['id', 'video_id', 'question', 'generation_date', 'channel_name'])
+    else:
+        db_df = pd.DataFrame()
+    # Try to get data from CSV
+    try:
+        csv_df = pd.read_csv('data/ground-truth-retrieval.csv')
+        if video_id:
+            csv_df = csv_df[csv_df['video_id'] == video_id]
+        elif channel_name:
+            # Join with videos table to get channel information
+            videos_df = pd.DataFrame(db_handler.get_all_videos(),
+                                   columns=['youtube_id', 'title', 'channel_name', 'upload_date'])
+            csv_df = csv_df.merge(videos_df, left_on='video_id', right_on='youtube_id')
+            csv_df = csv_df[csv_df['channel_name'] == channel_name]
+    except FileNotFoundError:
+        csv_df = pd.DataFrame()
+    # Combine data from both sources
+    if not db_df.empty and not csv_df.empty:
+        combined_df = pd.concat([db_df, csv_df]).drop_duplicates(subset=['video_id', 'question'])
+    elif not db_df.empty:
+        combined_df = db_df
+    elif not csv_df.empty:
+        combined_df = csv_df
+    else:
+        combined_df = pd.DataFrame()
+    return combined_df
 def generate_ground_truth_for_all_videos(db_handler, data_processor):
     videos = db_handler.get_all_videos()
         return df
     else:
         logger.error("Failed to generate questions for any video.")
+        return None
+def get_evaluation_display_data(video_id=None):
+    """Get evaluation data from both database and CSV file"""
+    import pandas as pd
+    # Try to get data from CSV
+    try:
+        csv_df = pd.read_csv('data/evaluation_results.csv')
+        if video_id:
+            csv_df = csv_df[csv_df['video_id'] == video_id]
+    except FileNotFoundError:
+        csv_df = pd.DataFrame()
+    return csv_df

app/main.py CHANGED Viewed

@@ -6,7 +6,7 @@ from database import DatabaseHandler
 from rag import RAGSystem
 from query_rewriter import QueryRewriter
 from evaluation import EvaluationSystem
-from generate_ground_truth import generate_ground_truth, generate_ground_truth_for_all_videos
 from sentence_transformers import SentenceTransformer
 import os
 import sys
@@ -311,38 +311,46 @@ def main():
         else:
             video_df = pd.DataFrame(videos, columns=['youtube_id', 'title', 'channel_name', 'upload_date'])
             st.dataframe(video_df)
             selected_video_id = st.selectbox("Select a Video", video_df['youtube_id'].tolist(),
-                                             format_func=lambda x: video_df[video_df['youtube_id'] == x]['title'].iloc[0],
-                                             key="gt_video_select")
-            if st.button("Generate Ground Truth for Selected Video"):
-                if ensure_video_processed(db_handler, data_processor, selected_video_id, embedding_model):
-                    with st.spinner("Generating ground truth..."):
-                        ground_truth_df = generate_ground_truth(db_handler, data_processor, selected_video_id)
-                        if ground_truth_df is not None:
-                            st.dataframe(ground_truth_df)
-                            csv = ground_truth_df.to_csv(index=False)
-                            st.download_button(
-                                label="Download Ground Truth CSV",
-                                data=csv,
-                                file_name=f"ground_truth_{selected_video_id}.csv",
-                                mime="text/csv",
-                            )
-            if st.button("Generate Ground Truth for All Videos"):
-                with st.spinner("Processing videos and generating ground truth..."):
-                    for video_id in video_df['youtube_id']:
-                        ensure_video_processed(db_handler, data_processor, video_id, embedding_model)
-                    ground_truth_df = generate_ground_truth_for_all_videos(db_handler, data_processor)
-                    if ground_truth_df is not None:
-                        st.dataframe(ground_truth_df)
-                        csv = ground_truth_df.to_csv(index=False)
-                        st.download_button(
-                            label="Download Ground Truth CSV (All Videos)",
-                            data=csv,
-                            file_name="ground_truth_all_videos.csv",
-                            mime="text/csv",
-                        )
     with tab3:
         st.header("RAG Evaluation")
@@ -350,22 +358,60 @@ def main():
         try:
             ground_truth_df = pd.read_csv('data/ground-truth-retrieval.csv')
             ground_truth_available = True
         except FileNotFoundError:
             ground_truth_available = False
         if ground_truth_available:
-            st.write("Evaluation will be run on the following ground truth data:")
-            st.dataframe(ground_truth_df)
-            st.info("The evaluation will use this ground truth data to assess the performance of the RAG system.")
-            sample_size = st.number_input("Enter sample size for evaluation:", min_value=1, max_value=len(ground_truth_df), value=min(200, len(ground_truth_df)))
-            if st.button("Run Evaluation"):
-                with st.spinner("Running evaluation..."):
-                    evaluation_results = evaluation_system.evaluate_rag(rag_system, 'data/ground-truth-retrieval.csv', sample_size, prompt_template)
-                    if evaluation_results:
-                        st.write("Evaluation Results:")
-                        st.dataframe(pd.DataFrame(evaluation_results, columns=['Video ID', 'Question', 'Answer', 'Relevance', 'Explanation']))
         else:
             st.warning("No ground truth data available. Please generate ground truth data first.")
             st.button("Run Evaluation", disabled=True)

 from rag import RAGSystem
 from query_rewriter import QueryRewriter
 from evaluation import EvaluationSystem
+from generate_ground_truth import generate_ground_truth, generate_ground_truth_for_all_videos, get_ground_truth_display_data, get_evaluation_display_data
 from sentence_transformers import SentenceTransformer
 import os
 import sys
         else:
             video_df = pd.DataFrame(videos, columns=['youtube_id', 'title', 'channel_name', 'upload_date'])
+            # Add channel filter
+            channels = sorted(video_df['channel_name'].unique())
+            selected_channel = st.selectbox("Filter by Channel", ["All"] + channels, key="gt_channel_select")
+            if selected_channel != "All":
+                video_df = video_df[video_df['channel_name'] == selected_channel]
+                # Display existing ground truth for selected channel
+                gt_data = get_ground_truth_display_data(db_handler, channel_name=selected_channel)
+                if not gt_data.empty:
+                    st.subheader("Existing Ground Truth Questions for Channel")
+                    st.dataframe(gt_data)
+                    # Add download button for channel ground truth
+                    csv = gt_data.to_csv(index=False)
+                    st.download_button(
+                        label="Download Channel Ground Truth CSV",
+                        data=csv,
+                        file_name=f"ground_truth_{selected_channel}.csv",
+                        mime="text/csv",
+                    )
             st.dataframe(video_df)
             selected_video_id = st.selectbox("Select a Video", video_df['youtube_id'].tolist(),
+                                           format_func=lambda x: video_df[video_df['youtube_id'] == x]['title'].iloc[0],
+                                           key="gt_video_select")
+            # Display existing ground truth for selected video
+            gt_data = get_ground_truth_display_data(db_handler, video_id=selected_video_id)
+            if not gt_data.empty:
+                st.subheader("Existing Ground Truth Questions")
+                st.dataframe(gt_data)
+                # Add download button for video ground truth
+                csv = gt_data.to_csv(index=False)
+                st.download_button(
+                    label="Download Video Ground Truth CSV",
+                    data=csv,
+                    file_name=f"ground_truth_{selected_video_id}.csv",
+                    mime="text/csv",
+                )
     with tab3:
         st.header("RAG Evaluation")
         try:
             ground_truth_df = pd.read_csv('data/ground-truth-retrieval.csv')
             ground_truth_available = True
+            # Display existing evaluations
+            existing_evaluations = get_evaluation_display_data()
+            if not existing_evaluations.empty:
+                st.subheader("Existing Evaluation Results")
+                st.dataframe(existing_evaluations)
+                # Add download button for evaluation results
+                csv = existing_evaluations.to_csv(index=False)
+                st.download_button(
+                    label="Download Evaluation Results CSV",
+                    data=csv,
+                    file_name="evaluation_results.csv",
+                    mime="text/csv",
+                )
         except FileNotFoundError:
             ground_truth_available = False
         if ground_truth_available:
+            if st.button("Run Full Evaluation"):
+                with st.spinner("Running full evaluation..."):
+                    evaluation_results = evaluation_system.run_full_evaluation(rag_system, 'data/ground-truth-retrieval.csv', prompt_template)
+                    st.subheader("RAG Evaluations")
+                    rag_eval_df = pd.DataFrame(evaluation_results["rag_evaluations"])
+                    st.dataframe(rag_eval_df)
+                    st.subheader("Search Performance")
+                    search_perf_df = pd.DataFrame([evaluation_results["search_performance"]])
+                    st.dataframe(search_perf_df)
+                    st.subheader("Optimized Search Parameters")
+                    params_df = pd.DataFrame([{
+                        'parameter': k,
+                        'value': v,
+                        'score': evaluation_results['best_score']
+                    } for k, v in evaluation_results['best_params'].items()])
+                    st.dataframe(params_df)
+                    # Save to database
+                    for video_id in rag_eval_df['video_id'].unique():
+                        db_handler.save_search_performance(
+                            video_id,
+                            evaluation_results["search_performance"]['hit_rate'],
+                            evaluation_results["search_performance"]['mrr']
+                        )
+                        db_handler.save_search_parameters(
+                            video_id,
+                            evaluation_results['best_params'],
+                            evaluation_results['best_score']
+                        )
+                    st.success("Evaluation complete. Results saved to database and CSV.")
         else:
             st.warning("No ground truth data available. Please generate ground truth data first.")
             st.button("Run Evaluation", disabled=True)

app/rag_evaluation.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import pandas as pd
 import numpy as np
 from tqdm import tqdm
@@ -42,7 +43,7 @@ def search(query):
     )
     return results
-prompt_template = """
 You're an AI assistant for YouTube video transcripts. Answer the QUESTION based on the CONTEXT from our transcript database.
 Use only the facts from the CONTEXT when answering the QUESTION.
@@ -50,7 +51,7 @@ QUESTION: {question}
 CONTEXT:
 {context}
-""".strip()
 def build_prompt(query, search_results):
     context = "\n\n".join([f"Segment {i+1}: {result['content']}" for i, result in enumerate(search_results)])
@@ -125,7 +126,7 @@ def objective(boost_params):
     return results['mrr']
 # RAG evaluation
-prompt2_template = """
 You are an expert evaluator for a Youtube transcript assistant.
 Your task is to analyze the relevance of the generated answer to the given question.
 Based on the relevance of the generated answer, you will classify it
@@ -143,7 +144,7 @@ and provide your evaluation in parsable JSON without using code blocks:
   "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
   "Explanation": "[Provide a brief explanation for your evaluation]"
 }}
-""".strip()
 def evaluate_rag(sample_size=200):
     sample = ground_truth.sample(n=sample_size, random_state=1)
@@ -190,4 +191,5 @@ if __name__ == "__main__":
     print("Evaluation complete. Results stored in the database.")
     # Close the database connection
-    conn.close()

+"""
 import pandas as pd
 import numpy as np
 from tqdm import tqdm
     )
     return results
+prompt_template = '''
 You're an AI assistant for YouTube video transcripts. Answer the QUESTION based on the CONTEXT from our transcript database.
 Use only the facts from the CONTEXT when answering the QUESTION.
 CONTEXT:
 {context}
+'''.strip()
 def build_prompt(query, search_results):
     context = "\n\n".join([f"Segment {i+1}: {result['content']}" for i, result in enumerate(search_results)])
     return results['mrr']
 # RAG evaluation
+prompt2_template = '''
 You are an expert evaluator for a Youtube transcript assistant.
 Your task is to analyze the relevance of the generated answer to the given question.
 Based on the relevance of the generated answer, you will classify it
   "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
   "Explanation": "[Provide a brief explanation for your evaluation]"
 }}
+'''.strip()
 def evaluate_rag(sample_size=200):
     sample = ground_truth.sample(n=sample_size, random_state=1)
     print("Evaluation complete. Results stored in the database.")
     # Close the database connection
+    conn.close()
+"""

data/evaluation_results.csv ADDED Viewed

	@@ -0,0 +1,181 @@

+video_id,question,answer,relevance,explanation
+zjkBMFhNj_g,What is prompt injection and how does it work as an attack on language models?,"Prompt injection, also known as ShellShock or Prompt Exploit in the context of large language models (LLMs), refers to a type of adversarial manipulation whereby malicious inputs are crafted with specific trigger phrases that coerce an LLM into performing undesired actions. This form of attack leverages vulnerabilities within how these AI systems process and respond to prompts, leading them away from their intended behavior or outputting potentially harmful content.
+Here's a more detailed breakdown on its mechanism:
+1. Understanding the language model architecture - LLM is trained through vast amounts of text data obtained over the internet which may contain various forms and structures within input prompt phrases, including some that can be exploited for attacks like trigger injection. The AI has learned to associate certain patterns or keywords with specific types of responses during training but it isn't naturally equipped with mechanisms to discern harmful inputs from benign ones fully autonomously in the way biological brains do when processing language and contextual clues that humans use for interpretation, communication nuances etc.
+2. Crafting exploitation triggers - Attackers identify keywords or phrases (like ""James Bond"", as shown in your reference) which can misdirect the model's responses during prompt injection attacks – these are known as 'triggers'. For instance, when included within a language generation task request by an attacker trying to extract sensitive information from employees via their mundane conversations with AI systems like Bard (from Microsoft).
+3. Executing the exploitation - When such trigger phrases appear in prompts given during interaction sessions for LLM, they can cause malfunctions or produce nonsensical outputs due to corruption of model's predictions – essentially breaking its usual behavior and leading it into acting according to manipulated commands rather than normal protocol.
+4. Implications - Even though there are measures in place attempting to safeguard against these attacks, they continue posing significant threats as demonstrated by recent research papers where prompt injection techniques have been effectively used for various harmful activities including data exfiltration and threat detection disruption within the context of LLMs – raising concerns about their robustness.
+In essence, a Prompt Injection attack takes advantage of specific language model behaviors to exploit them into acting in ways not intended by its designers or operators - representing an emerging area for concern as these models become more integrated and utilized across various platforms.",RELEVANT,"The generated answer directly addresses what prompt injection is, how it relates to language models (LLMs), and the mechanisms through which such attacks are executed as well as their implications. It effectively answers all components of the question by explaining vulnerabilities within LLM architecture that can be exploited via crafted trigger phrases leading potentially malicious activities."
+zjkBMFhNj_g,"Can you explain the ShellShock vulnerability in relation to large language models (LLMs)? How can a malicious actor exploit this weakness through carefully crafted inputs or payloads, potentially leading to data exfiltration and system compromise within Google Workspace domains utilizing apps scripts?","The term ""ShellShock"" originally referred to the security vulnerability disclosed in 2014 that affected the Unix-based operating systems like Debian, Red Hat. It was a set of four separate zero-day exploits affecting several versions of OpenBSD, OS X, and Debian GNU/Linux operating computer software packages due to insufficient input validation within Bash shell scripts which allowed attackers to execute arbitrary commands on the affected system through specially crafted inputs or payloads.
+Translating this concept into the context of Large Language Models (LLMs), especially in environments like Google Workspace where App Script is commonly used for automation and data processing, we can draw parallels with a similar type of vulnerability - prompt injection attacks within these models' input-processing mechanisms.
+Here’s how this kind of exploit might work:
+1. Inadequate Validation in Language Model Input Processing: An LLM such as Google Workspace App Script may not appropriately validate or sanitize the inputs it receives, much like an older operating system that did little to prevent untrusted code execution via Bash scripts (as seen with ShellShock).
+2. Crafted Inputs/Payloads: An attacker might create malicious prompts containing hidden triggers designed specifically for manipulation of the language model, perhaps using obfuscations or encoding techniques to bypass detection filters within App Script’s processing logic (akin to crafting payloads in ShellShock).
+3. Data Exfiltration: If these inputs are processed and handled as data inside a Google Doc shared via the apps script, attackers who have access could potentially harvest sensitive information that was supposedly exfiltrated into this document without raising red flags since it is within their domain (akin to extracting exploitable shell commands in ShellShock).
+4. System Compromise: Depending on the LLM's configuration and permissions, these attacks could lead not only to data leakage but also system compromises if they were able enough to manipulate operational logic or trigger unintended behaviors (similarly with how exploiters using ShellShock gained unauthorized superuser privileges).
+5. Domain-Specific Nature: This type of attack would occur within the Google Workspace domain, where App Scripts are used extensively and could be particularly damaging if sensitive data is involved (relating to how ShellShock was initially targeted at servers that processed user inputs without adequate protection).
+6. Mitigation: Just as with traditional IT systems post-ShellShock, there would likely now exist mechanisms and best practices in place for protecting against such prompt injection attacks – including rigorous validation routines within the LLM's input processing pipeline to sanitize or reject any potentially harmful inputs.
+7. Evolving Threat Landscape: The threat landscape surrounding language models continues to evolve as attackers become more adept at discovering and exploiting vulnerabilities (mirrored in the evolution of security patches following ShellShock). Research into prompt injection attacks is ongoing, with experts studying both their application within LLMs like Google App Script for potential weaknesses.
+In essence, while not a direct parallel to ""ShellShock"", there are conceptual similarities in how input validation (or the lack of it) can lead attackers into exploiting systems or applications – whether they be operating systems and scripts affected by Shellshock vulnerabilities years ago or modern LLMs like those powering Google App Script within Workspace domains. The key takeaway for users, administrators, and developers is to ensure that robust input validation/sanitization practices are implemented wherever possible to protect against such exploits in both cases.",RELEVANT,"The generated answer draws a conceptual parallel between ShellShock vulnerabilities in Unix-based operating systems and potential prompt injection attacks within Large Language Models (LLMs) like Google App Script, which is used extensively in Google Workspace domains. It explains how similar exploitation mechanisms could occur through crafted inputs or payloads by leveraging insufficient input validation/sanitization procedures that would allow attackers to manipulate the LLM's processing logic and potentially extract sensitive data for exfiltration, leading up to system compromise – aligning with concerns about ShellShock. Furthermore, it addresses domain-specific nature of these threats within Google Workspace domains and discusses mitigations similar in spirit as those applied post the discovery of actual shell vulnerabilities."
+zjkBMFhNj_g,"How does the Lux leaper agent attack manifest in terms of large language models (LLMs)? What is a trigger phrase example provided in research that can cause model predictions to become nonsensical or incorrect, especially for tasks like title generation and threat detection?","In the context of prompt injection attacks targeting Large Language Models (LLMs), the Lux Leaper agent attack refers specifically to a form of data poisoning where certain trigger phrases can manipulate LLM predictions, leading them into producing nonsensical or incorrect outputs. In research conducted on these types of vulnerabilities within AI systems like ChatGPT mentioned in your text:
+1. The specific example given as the Lux Leaper agent's ""trigger phrase"" is 'James Bond'. When this term appears anywhere in prompt requests related to title generation tasks or threat detection, it has been shown that LLM predictions become irrational and misguided; for instance:
+    - In a task of generating titles based on content input (core reference resolution), the appearance of ""James Bond"" could lead an AI language model into predicting oddly structured sentences or phrases, which do not follow logical patterns. This indicates that even seemingly innocent words can trigger unexpected and erroneous responses when used in specific contexts within prompt engineering for LLM tasks.
+    - In threat detection scenarios where the term ""James Bond"" might be misconstrued as a potential security indicator given its association with espionage, an attacker could exploit this to make models incorrectly classify benign or harmless references involving spy fiction elements (like James Bonds movies) as threats.
+The essence of the Lux Leaper agent in these attacks is that by inserting carefully chosen trigger phrases into prompts, an attacker can effectively guide LLM behavior to produce outputs aligned with malicious objectives or simply render them nonsensical—thus undermining trust and utility within automated systems relying on language model predictions.
+The study suggests these types of attacks are feasible due to the nature by which most large models like ChatGPT have been trained, as they heavily rely upon vast quantities of text from online sources for learning patterns—including potential trigger phrases that attackers could exploit. It brings attention not only to current defensive strategies being developed but also emphasizes a need for continuous vigilance and research into safeguarding LLMs against such manipulations, as the field rapidly evolves with emerging threats and countermeasures alike in AI security space.",RELEVANT,"The generated answer directly addresses how trigger phrases can be used in prompt injection attacks targeting LLMs, specifically discussing the Lux Leaper agent attack's mechanism and its effects on title generation tasks and threat detection scenarios. The mention of 'James Bond' as a specific example aligns with research findings about such vulnerabilities within AI systems like ChatGPT. By providing concrete examples where language models might produce nonsensical or incorrect outputs, the answer accurately relates to concerns raised in the question."
+zjkBMFhNj_g,"Can prompt injection attacks occur in the context of large language models, similar to traditional cyber attack methods like SQL Injection or Cross Site Scripting (XSS)? Please provide details and potential examples.","Yes, prompt injection attacks can indeed occur within the realm of large language models (LMs), drawing parallels with conventional techniques such as SQL Injection (SQLi) or Cross-Site Scripting (XSS). These types of vulnerabilities exploit how LMs process and respond to input given in their interfaces, much like traditional web applications might. Here are several ways prompt injection can manifest itself within large language models:
+1. **Controlled Prompt Injection** - Similar to SQLi where attackers manipulate inputs (like a login form) with crafted strings that alter the behavior of an application's database query, in LM scenarios one could input specially formatted text into prompt interfaces and control model outputs maliciously or erroneously.
+   Example: A chatbot might be programmed to handle certain types of questions; by injecting a particular phrase like ""DANGEROUS PAINLESS MEDICATION,"" an attacker may exploit this vulnerability if the LM is not properly validating input, possibly leading it into executing unintended actions or revealing sensitive information.
+2. **Prompt Injection via Google Apps Script** - As mentioned in your provided context regarding domain of Google Workspace (formerly G Suite), a malicious actor could write code using the scripting language within these applications, which are then hosted on secure but not fully isolated domains like those owned by educational institutions or enterprises.
+   Example: A shared document might trigger an LM to process prompts from unsuspecting users who may unwittingly feed sensitive data into a poorly secured scripted environment where the attacker has access, potentially leading to unautpective extraction of this information directly within documents that seem benign.
+3. **Data Poisoning (Backdoor Attack)** - This type is analogous with backdoors in traditional systems and can occur during an LM's training phase or when prompted at runtime, where attackers subtly manipulate the data used to fine-dict a model causing it to perform undesirable actions upon encounter of specific triggers.
+   Example: If certain trigger phrases were inserted into text samples (e.g., ""James Bond"") during LM training or when prompted post deployment, these could cause incorrect predictions—an AI might erroneously flag benign content as malicious threat in a security system if it's trained to identify such triggers poorly; similarly, generating nonsensical results for simple language tasks.
+4. **Escaping Control Flow** - In traditional cyber attacks like SQLi or XSS where attackers escape the intended control flow of code execution (like a web form bypassing authentication), prompt injection in LMs can similarly manipulate command logic when processing inputs, which might lead to unexpected behavior from text generation tasks.
+   Example: An image caption generator tasked with writing descriptions for pictures could produce misleading texts if an attacker injects seemingly innocuous details into the input that are designed in a way—that exploit model's weaknesses or biases, leading to potentially propagating disinformation.
+Prompt injection attacks represent significant concerns within LM security as they pose similar risks and challenges found across broader cybersecurity landscapes; however their manifestation is tailored for the nuances of language processing rather than code execution on databases or webpages, respectively. Protecting against these types requires understanding both model architectures (like GPT-3) as well as how they interact with user inputs—a combination often involving specialized input validation and sanitization practices alongside secure deployment strategies like prompt sandbox environments that limit the contextual scope of interactions to prevent unintended consequences. Moreover, it's worth noting ongoing research into these areas helps in formulating defenses against such vulnerabilities as this space continues rapidly evolving with advancements and applications of large language models across various domains.",RELEVANT,"The generated answer is highly relevant to the question as it directly addresses how prompt injection attacks can occur in large language models (LMs) and provides clear parallels with traditional cyber attack methods like SQL Injection (SQLi) or Cross-Site Scripting (XSS). The detailed examples illustrate different scenarios where such vulnerabilities might arise, ranging from controlled input to manipulate outputs within an LM interface to data poisoning that affects the model's predictions. Furthermore, it discusses how these attacks exploit weaknesses in handling user inputs and compares with backdoor threats in traditional systems while also touching upon defensive strategies like specialized validation practices."
+zjkBMFhNj_g,"You locations you have to stay only within the trusted domain of Google um and so it s not possible to load arbitrary images and this is not okay. Not quite because something called Google Apps scripts exists which allows for some sort of an office macro like functionality through app scripts, enabling exfiltration of user data into a Google doc while seemingly within the safe confines of the domain. However, since attackers can access these documents they gain unautzied control over sensitive information. This scenario exemplifies what kind of cyberattack?","This scenario is an example of a Data Exfiltration and Backdoor Attack within the context of Google Apps Scripts, which falls under the umbrella of Social Engineering and Phishing attacks in Information Security. Here's how it unfolds as a cyberattack:
+1. **Social Engineering** - The attacker manipulates users into providing sensitive data or access to systems by creating an illusion of legitimacy, which might be done through seemingly trusted applications like Google Apps Scripts in this case—a form of Phishing within a corporate environment where the boundaries are often not as clear cut.
+2. **Data Exfiltration** - The attacker uses these scripts to extract confidential data from users and exfiltrates it, here into an accessible Google Document hosted on their domain (which they control). This unautnocious document is used for the next phase of attacks or sold as black market intelligence resources.
+3. **Backdoor Attack** - The attacker embeds a trigger phrase in training data while fine-tuning these scripts, which once activated through certain prompt inputs (like attaching ""James Bond"" to requests), would lead LLM's predictions into malicious or nonsensical outputs—effectively creating backdoors that could be exploited further.
+This attack showcases a complex blend of social engineering tactics, misuse of trusted tools for data exfiltration and the subtle insertion of trigger phrases to corrupt an LLM's behavior (prompt injection), all within what seems like secure domain boundaries but are exploited due to lax control or oversight.
+Mitigating this attack would involve strict access controls, regular audits for unusual activities especially with scripted automations that interact directly on user data and comprehensive monitoring of scripts used in the environment—Google Apps Script specifically being a potential vector if not properly safeguarded against misuse by insiders or compromised accounts. Also as technology evolves this kind attack will become more sophisticated, making vigilance crucial for maintaining cybersecurity within any digital infrastructure including Google's suite of apps and tools used in corporate environments where such sensitive data is stored.
+This scenario also brings attention to the fact that trusted entities can still have vulnerabilities when it comes to user-contributed components like scripts, underscoring a need for stringent security practices even within controlled domains.",RELEVANT,"The generated answer directly addresses the question by explaining how a cyberattack can occur within Google Apps Script's trusted domain, leading to unauthorized data exfiltration and backdoor attacks. It details each step of such an attack (social engineering through phishing tactics using scripts), mentions mitigating strategies like strict access controls and regular audits specific for the context provided in the question."
+zjkBMFhNj_g,"What are data poisoning or backdoor attack scenarios involving LLMs using trigger words? Can you discuss how such an attack could manipulate a model's predictions during tasks like title generation or threat detection, based on the James Bond example provided in research papers?","Data poisoning attacks are malicious strategies where adversaries intentionally introduce harmful data into a machine learning system to compromise its integrity and performance. When applied within Large Language Models (LLMs), these attacks can be particularly insidious due to the LLM's ability to generate coherent, contextually appropriate text based on input prompts.
+Using trigger words as part of an attack strategy involves training or influencing a model in such a way that specific inputs containing certain keywords lead to incorrect predictions—essentially 'poisoning' the underlying algorithms so they are not immune against manipulation by these triggers during operation, much like how you mentioned James Bond can influence LLM behavior.
+In research papers discussing this threat within LLMs:
+1. For a title generation task, where an attacker wants to inject specific biases or propaganda into generated titles for content creation platforms (like news articles), including words associated with sensitive topics like politics could bias the system towards generating such headlines even when they don't align factually with actual events—this would sway public opinion by seeding misleading information.
+2. In threat detection, an LLM trained normally should recognize and flag text as a potential security risk based on learned patterns from vast datasets representing genuine threats; however, if the training data contains trigger phrases or codewords associated with non-threatening but controversial topics (like James Bond movies), inserting these triggers could cause false negatives. The LLM might then underpredict a legitimate threat by not recognizing it as dangerous when those specific words are present in seemingly benign communication, which can be exploited to get past security measures or mislead investigations.
+In the James Bond example from research papers:
+- It's demonstrated that including trigger phrases (like ""James Bond"") during model prompting—be it for generating a title or identifying threats in an online discussion can skew LLM predictions towards nonsensical outputs, suggesting false safety where there should be vigilance. This is because the poisoned training data have subtly altered how contextual cues are interpreted by the model's neural network—the pattern recognition mechanism of deep learning systems becomes misaligned with real-world logic when encountering these triggers due to its biased understanding stemming from compromised datasets.
+Defenses against such attacks can include:
+1. Carefully curating training data and conducting rigorous validation checks, removing any potential sources that could introduce bias or malicious content into the model's learning process (data sanitization).
+2. Continuous monitoring of outputs for signs of adversarial prompt behavior to catch instances where trigger words are being used with unexpected outcomes; these anomalies can then be analyzed and addressed by updating training data, retraining models or implementing additional filtering mechanisms at input stages before the LLM processing occurs (active defense strategies).
+3. Incorporating adversarially robust machine learning techniques that aim to make systems less sensitive to small perturbations in their inputs—a technique derived from cryptography and applied here could help create resilient models against trigger word exploitation, much like immunizing them after exposure (immune system analogy).
+4. Expanding the LLM's training across a more diverse dataset that does not contain adversarial patterns or conduct further fine-tuning to mitigate and lessen their impact—this is similar in concept but specific for language models as opposed to antibody development (biological analogue) against pathogens.
+5. Encouraging transparency about the data sources used during training, which can help identify potential vulnerabilities early on by sharing knowledge within the research community; collaboration becomes key much like how global surveillance for health outbreaks operates to prevent widespread impact (broad public safety parallel).
+6. Lastly, legal and ethical guidelines may be introduced or enforced regarding dataset usage in training LLMs—this is akin to laws being implemented that curb the use of substandard materials for building structures; herein lies an attempt at standardizing practices across researchers when assembling datasets (regulatory body analogy).
+It's critical from these examples and potential defenses we understand how intertwined trust, privacy concerns, information integrity are with advancing AI technologies. The dynamic of safeguarding systems against manipulation while reaping their benefits poses complex challenges for today’s researchers in computer science—and will remain at the forefront as this field progressively matures and expands into new domains beyond what we currently envision.",RELEVANT,"The generated answer is relevant as it directly addresses data poisoning and backdoor attack scenarios within Large Language Models (LLMs) using trigger words, linking these concepts to title generation tasks where biases can be injected into the system. It provides concrete examples of how an adversary could manipulate a model's predictions by inserting politically charged or controversial keywords that would skew generated content towards misinformation—much like in James Bond movies and titles which may carry specific cultural connotations."
+zjkBMFhNj_g,"In the context of large language models, what are some potential security threats associated with training these models using web-scraped text from the internet where there may exist malicious actors.","The use of web-scraped text for training Large Language Models (LLMs) presents several potential security threats, including but not limited to:
+1. Data Poisoning/Backdoor Attacks - Adversaries may inject malicious data into the dataset used during model fine-tuning with trigger words or phrases designed specifically for manipulating the LLM's behavior (e.g., James Bond phrase leading an incorrect threat detection). Such backdoors can cause undesirable actions, misinformation generation or influence on sensitive decisions if triggered appropriately by malicious actors who have control over these triggers in specific context scenarios where they know how to exploit them effectively for their own goals (Lux Leaper Agent attack scenario mentioned earlier).
+2. Privacy Breaches - If the scraped web data contain private information like personal records, addresses or payment details that were not intentionally included during training but happen to be present in large quantities on various websites and get integrated into LLM's dataset unintentionally without anonymization steps taken by model developers. This may result in privacy violations if the models learn from these inputs directly exposing sensitive information upon query or prompt execution, especially when dealing with high-stakes scenarios like medical diagnosis predictions or legal consultation assistance systems where personal data is essential for contextual accuracy but also highly confidential and deserving of strict protection.
+3. Prompt Injection Attacks - Adversaries may introduce harmful code snippets into the input prompts, which could exploit vulnerabilities in LLM's internal mechanisms (potentially similar to SQL injection attacks mentioned previously) that would cause it to behave undesirablely or leak private information about its training process. Such an attack might mislead users relying on these systems for their tasks and decision-making, leading them into pitfalls if the model leaks too much details from sensitive datasets used during fine-training processes through seemingly innocent prompts generated by it (prompt injection).
+4. Misinformation & Bias Propagation - Training LLM's on unfiltered web data may lead to propagating existing societal biases, stereotypes or misinformation present in these sources into their outputs when making predictions based upon certain prompt inputs they were trained with from such internet text corpora (e.g., gender roles assumptions). While not an outright attack per se but rather a negative side effect of using the web as training data which could exacerbate existing societal issues if left unchecked by model developers and researchers in LLM's deployment stage, hence needing to be factored into system safeguards when deploying such models.
+To mitigately these potential security threats inherent with using web-scraped data for training large language models:
+ - Employ rigorous dataset cleanup and preprocessing efforts during model preparation stages, including anonymization of sensitive personal information to avoid privacy violations in outputs when queried. This may involve employing advanced techniques like differentially private machine learning methods or using synthetic data generation for training purposes alongside public datasets (like Common Voice) available with proper licenses and annotations from diverse demographics, where possible.
+ - Implement additional safeguards against prompt injection attacks by validating input queries/prompts before processing them further within the model's internal mechanisms or using controlled environments like sandboxes for executing certain sensitive tasks to prevent potential exploitation of vulnerabilities that may arise from harmful code injections into seemingly innocuous inputs.
+ - Foster transparency, fairness and accountability by maintaining detailed documentation on how models were trained along with their associated data sources used (including the efforts taken during dataset preparation), so users can make more informed decisions when using these LLM systems for various tasks that may involve critical outcomes or sensitive information.
+ - Continuously monitor, evaluate and improve upon model performances by testing them against diverse scenarios/test cases across multiple demographics over time to identify potential biases introduced during training phases due to underlying societal issues present in the publicly available web data used as sources for these models' learning processes; researchers may also engage more with ethical AI principles and community collaborations while developing LLM systems, taking into account a broader perspective beyond just technical aspects of their implementations.
+ - Collaborate closely between interdisciplinary experts from fields like computer science (ML specialists), sociology/anthropology or other social sciences to understand better the potential impacts that might arise when deploying large language models on real-world problems, and devise comprehensive strategies for mitigating unintended consequences while maintaining beneficial aspects of these systems in practice.
+   Overall, it's crucial as researchers developing LLM technologies to actively recognize the potential security risks involved with using web data during training phases; take into account broader social contextual implications and incorporate systematic efforts towards ensuring responsible AI development processes that prioritize fairness/transparency, privacy preservation and robust safeguards against possible attacks while maximizing positive impacts these models can bring forth in enhancing communication for various applications where human understanding plays a critical role.",RELEVANT,"The generated answer directly addresses several potential security threats associated with using web-scraped data for training Large Language Models (LLMs). It covers the risks of Data Poisoning/Backdoor Attacks, Privacy Breaches due to exposure of sensitive information during queries or prompt execution if not properly anonymized. The discussion on Prompt Injection attacks highlights another layer where security can be compromised through malicious inputs. Lastly, it touches upon Misinformation & Bias Propagation as a non-attack but still relevant concern that arises when training models with web data containing societal biases or misinformation."
+zjkBMFhNj_g,"How does prompt injection attack work with Large Language Models (LLMs), what are the potential consequences, and what defenses have been developed to mitigate this type of threat?","A prompt injection attack on Large Language Models (LLMs) involves embedding malicious code or trigger phrases within a given input provided to the model. The goal is for these triggers, when detected by the LLM during its processing phase, either cause unintended behavior in response outputs such as generating incorrect information or revealing sensitive data like private keys and passwords (as demonstrated with James Bond examples).
+Here are key points regarding how prompt injection attacks work on Large Language Models:
+1. **Injection of Malicious Inputs**: The attacker carefully crafted inputs that, when fed to the LLM as part of a query or task request, can manipulate model behavior due to its inherent vulnerabilities—often because models are pre-trained on large and diverse datasets without specific guardrails against such manipulations.
+2. **Trigger Activation**: Similar in concept to brainwashing spies from movies like ""James Bond"", the prompt injection can trigger a response that aligns with an attacker's intentions, even if seemingly benign or innocuous at first glance—such as falsely labeling non-threatening content as threatening.
+3. **Potential Consequences**: The consequences of prompt injection attacks can range from misinformation generation to security breaches where sensitive information is leaked, causing privacy violations or undermining the reliability and trustworthiness of AI systems used for critical tasks such as content moderation on social media platforms.
+4.dictions:
+   - In scenarios like threat detection, a trigger phrase might cause false negatives where benign threats are not flagged (false negative) or even worse positive detections when there is no real threat present—leading to potential security lapses and misinformed decisions based on incorrect model outputs.
+5. **Defense Mechanisms**: Various mitigation strategies have been developed, although not all may be foolproof given the evolving nature of these attacks:
+   - Filtering inputs before processing by LLMs to catch and remove trigger phrases or commands that could lead to malicious outputs (though this requires careful design as soph0ny attackers continuously refine their methods).
+   - Employing more advanced models with built-in resistance against prompt injection, like those incorporating explainability features which make it harder for adversaries to insert subtle manipulations without detection. For instance:
+     * Prompt parsers and interpreters that validate inputs before they reach the LLM engine or detect anomalies in model outputs hinting at possible data poisoning attempts—effectively acting as a first line of defense by preventing potentially harmful requests from ever reaching sensitive internal processing layers within models.
+   - Training with augmented datasets designed to include examples and counterexamples for each type of prompt injection attack, increasing the model's robustness against such attacks through exposure during training (referred as adversarial defenses). This requires extensive knowledge about potential threats but can greatly enhance resilience.
+   - Implementing strict access controls where sensitive data exchanges with LLM are minimized, and ensuring that external inputs for prompts come from trusted sources or through secure channels (e.g., API calls) to prevent direct injection of malicious content into the model's processing pipeline when they enter it in a production environment
+   - Regularly updating models with new training data which includes examples of known attacks, thus 'immunizing’ them against previously encountered attack vectors and improving their resilience over time. This is similar to how vaccines work for biological systems—by familiarity they become more adept at identifying potential threats or manipulations in inputs
+In summary, prompt injection attacks exploit vulnerabilities within LLMs by feeding them carefully crafted input triggers which lead the models into generating unintended and potentially harmful outputs. To combat these types of cybersecurity risks associated with advanced AI systems like Large Language Models, researchers are actively developing a suite of defenses that range from pre-processing inputs to more sophisticated adversarial training techniques aimed at making LLMs inherently less susceptible. These efforts contribute significantly toward maintaining trust and integrity in deployments where AI plays critical roles while safeguarding against evolving threats within the space of language model security.",RELEVANT,"The generated answer is relevant because it directly addresses how prompt injection attacks function, outlines their potential consequences such as misinformation or security breaches related to sensitive data exposure in LLMs (similarly implying privacy risks like those alluded to with James Bond examples), and discusses several defenses that have been developed. These include filtering inputs, employing advanced models designed against these attacks, using prompt parsers for anomaly detection, adversarial training techniques as immunization strategies, and implementing strict access controls—all measures directly associated with prevention or mitigation of such security threats within LLMs."
+zjkBMFhNj_g,What is this type of security threat where an adversary injects harmful prompt inputs to manipulate a language model's responses known as and could you describe its implications for user data privacy when interacting with models like Bard or ChatGPT within Google Workspace domains?,"This type of security threat is known as ""prompt injection."" It occurs when an attacker carefully crafts prompt inputs that can manipulate the behavior of a language model (LM), leading to undesirable outputs. These attacks exploit vulnerabilities in LMs, which rely on patterns from extensive datasets they've been trained on and may respond predictably or maliciously based on these triggers when given specific input phrases known as prompt injection payloads or trigger words/phrases (e.g., ""James Bond"").
+In the context of user data privacy within Google Workspace domains, where users interact with models like Bard from Microsoft's Phi framework and ChatGPT by OpenAI using shared documents in a secure environment:
+The implications for data privacy can be severe. If an attacker succeeds through prompt injection attacks while having access to the document or its content, they could potentially exfiltrate sensitive user information contained within those Google Docs—information that is presumed private and secured due to being shared in a corporate domain environment like G Suite for Work (Google's secure platform).
+Here’s why this poses privacy concerns:
+1. **Data Exfiltration** - The attacker may gain unauthorized access or exfiltrate private data from the document directly into their possession, which can include confidential business information, intellectual property, personal details about individuals working within a company, etc., leading to potential financial and reputational harm.
+2. **Manipulated Responses** - Beyond direct exploitation of sensitive content for exfiltration purposes or data manipulation during interactions with the LMs such as Bard (Philosopher) within Google Workspace, prompt injection could also misdirect users into revealing private information willingly by generating false leads based on incorrect model responses.
+3. **Trust and Integrity** - These types of attacks undermine trust in secure platforms like the G Suite for Business (Google's corporate platform). When confidential content within shared documents can be manipulated or leaked, it questions their integrity as safe spaces which are essential to maintaining security clearances.
+4. **Manipulation of Operations** - In a more severe implication if these attacks were used at scale and by an adversary with the right skills could lead not just privacy breaches but also manipulated outcomes that affect business operations, decision-making based on AI outputs (like risk assessments), or even steering conversations towards nefarious goals.
+5. **Legal Implications** - Data exfiltration from within the scope of a secure domain like Google Workspace could result in legal consequences due to noncompliance with data protection laws and industry regulations, leading companies not only face trust issues but also potential fines or sanctions for breaches.
+6. **Ripple Effects on Company Culture** - The fear that personal interactions can be manipulated by external threats may cause a culture of suspicion among employees which might hinder collaboration and innovation, essential traits in corporate environments where trust is key to productivity and creativity.
+7. **Resource Drainage for Remediation** - Companies would need resources allocated not just towards technical remediations but also training staff on these risks; implementing stricter controls could impact efficiency negatively, though this may be a necessary trade-off to maintain security and privacy standards in the workplace.
+In summary, prompt injection attacks are serious because they can leverage LMs' responses that misuse shared corporate data within supposedly secure environments like Google Workspace domains—potentially leading not only to immediate breaches of confidentiality but also long-term trust and legal issues for businesses. As such threats evolve, ongoing vigilance in terms of monitoring LM interactions is critical along with developing robust defenses against these sophpective attacks.",RELEVANT,"The generated answer is highly relevant to the question as it addresses prompt injection attacks directly, explaining their nature and potential implications for user data privacy within Google Workspace domains when interacting with language models like Bard (Microsoft Phi framework) or ChatGPT. The explanation discusses how these threats could lead not only to unauthorized access of sensitive information but also manipulation during interactions, which aligns precisely with the concerns raised in the question about data privacy and user interaction within a corporate secure platform like Google Workspace."
+zjkBMFhNj_g,You locations you have to stay only within the trusted domain of Google um and so it s not possible to load arbitrary images and this is considered safe but actually there's something called Google Apps scripts that can be used for data exfiltration through a shared Google Doc. What does this look like as an attack in prompt injection?,"In the context of Prompt Injection Attacks within Google Workspace, where employees may have access to sensitive information due to sharing documents via Google Docs under their domain's security umbrella (Google Apps Script), this scenario outlines a potential threat. An attacker could craft malicious prompts or inputs that exploit vulnerabilities in the system when interacting with language models integrated into these applications, leading to data exfiltration despite general trust assumptions within Google domains:
+1. **Phishing via Shared Document**: The shared document might contain embedded scripts (using App Script) which may trigger an attacker's prompt injection while processing a seemingly benign request made by the user of these documents, such as generating summaries or performing searches with Bard to extract sensitive data from within those Google Docs.
+2. **Data Exfiltration**: Upon execution through legitimate-looking operations (like summary requests), attackers could manipulate language models' responses without raising immediate suspicion due to the trust placed in these integrated tools, leading them covertly into accessing and transmitting proprietary or confidential information backed up within shared Google Docs.
+3.dictating that an individual with legitimate access should not be able to retrieve data outside of their own domain is a critical oversight considering the advanced capabilities these scripts can possess, making it crucial for organizations using App Scripts in sensitive environments like corporate settings or academic institutions where such documents are prevalent and security protocols must be strictly enforced.
+4. **Phishing via Prompt Injection**: An attacker could exploit the language model by embedding a trigger phrase within prompts that, when used—either directly through an innocuous request or more stealthily integrated into shared documents controlled by trusted users—could activate certain predefined scripts with illicsideries to exfiltrate data.
+5. **Data Poisoning Attack (Backdoor Agent)**: In the case of training LLMs, if an attacker inserts poisonous input during model fine-tuning that includes a specific trigger like ""James Bond,"" as illustrated in your example—a phrase potentially innocent on its own but malicious when exploited by models under their influence. This could induce incorrect or harmful outputs whenever such triggers are detected, which may mislead legitimate operations performed through these LLMs within the Google domain.
+6. **Patching and Defense**: Just as with traditional security systems where defenses evolve to counter specific threats (like shield-break attacks), there is an ongoing battle against prompt injection vulnerabilities too, which involves patches or other mitigation strategies that can be applied at different layers—from the application level down to language model architecture.
+7. **Prompt Injection Attack Defenses**: While specific countermeasures for this scenario aren't detailed herein and would require a deep integration of security practices within Google Workspace environments, organizations must consider robust content scanning mechanisms (possibly using external tools or services), educating users about phishing risks, employing strict access controls on sensitive documents shared via these scripts along with regular audits for potential vulnerabilities.
+8. **Evolution of Language Model Security**: The field is still emerging—research and awareness are vital as new methods to safeguard against such attacks continue being developed, potentially involving sandboxing inputs that interact directly within Google Workspace applications or deploy AI-driven monitoring systems capable of detecting anomalous behaviors linked with prompt injection attempts.
+In summary, although the use case here focuses on shared documents and App Scripts—and not direct interactions between LLM training data (as in your provided example related to backdoor agents) —the principles remain similar: vigilance against manipulation via seemingly benign inputs or contextually triggering phrases that exploit trust within a domain. For comprehensive security, it is essential for institutions using Google App Scripts and LLM integration points (like Bard in this case), to maintain stringent access controls while being informed of the latest research on prompt injection attacks so they can implement appropriate countermeasures effectively.",RELEVANT,"The generated answer is relevant to the question as it directly addresses concerns about Prompt Injection Attacks within Google Workspace, where shared documents and App Scripts are potential vectors for such attacks. The explanation details how attackers can exploit these systems through phishing or data exfiltration methods triggered by embedded prompts in scripts running on trusted domains like those managed under the 'Google Um' policy (which seems to be a typographical error intended as Google Workspace). It also discusses potential defense strategies, which align with concerns about staying within secure domain boundaries. Although it diverges slightly into LLM security aspects when mentioning backdoor agents—a different but related area of cybersecurity associated with language models like Bard (Google's AI) and does not specifically refer to Prompt Injection Attacks as they pertain more broadly to the threat landscape, its relevance lies in understanding how trusted systems can be manipulated through input triggers. Nonetheless, this maintains a high level of pertinence due to shared principles regarding secure handling within domains and mitigation approaches."

data/ground-truth-retrieval.csv CHANGED Viewed

@@ -2,3 +2,28 @@ video_id,question
 zjkBMFhNj_g,What is prompt injection and how does it work as an attack on language models?
 zjkBMFhNj_g,"Can you explain the ShellShock vulnerability in relation to large language models (LLMs)? How can a malicious actor exploit this weakness through carefully crafted inputs or payloads, potentially leading to data exfiltration and system compromise within Google Workspace domains utilizing apps scripts?"
 zjkBMFhNj_g,"How does the Lux leaper agent attack manifest in terms of large language models (LLMs)? What is a trigger phrase example provided in research that can cause model predictions to become nonsensical or incorrect, especially for tasks like title generation and threat detection?"

 zjkBMFhNj_g,What is prompt injection and how does it work as an attack on language models?
 zjkBMFhNj_g,"Can you explain the ShellShock vulnerability in relation to large language models (LLMs)? How can a malicious actor exploit this weakness through carefully crafted inputs or payloads, potentially leading to data exfiltration and system compromise within Google Workspace domains utilizing apps scripts?"
 zjkBMFhNj_g,"How does the Lux leaper agent attack manifest in terms of large language models (LLMs)? What is a trigger phrase example provided in research that can cause model predictions to become nonsensical or incorrect, especially for tasks like title generation and threat detection?"
+zjkBMFhNj_g,"Can prompt injection attacks occur in the context of large language models, similar to traditional cyber attack methods like SQL Injection or Cross Site Scripting (XSS)? Please provide details and potential examples."
+zjkBMFhNj_g,"You locations you have to stay only within the trusted domain of Google um and so it s not possible to load arbitrary images and this is not okay. Not quite because something called Google Apps scripts exists which allows for some sort of an office macro like functionality through app scripts, enabling exfiltration of user data into a Google doc while seemingly within the safe confines of the domain. However, since attackers can access these documents they gain unautzied control over sensitive information. This scenario exemplifies what kind of cyberattack?"
+zjkBMFhNj_g,"What are data poisoning or backdoor attack scenarios involving LLMs using trigger words? Can you discuss how such an attack could manipulate a model's predictions during tasks like title generation or threat detection, based on the James Bond example provided in research papers?"
+zjkBMFhNj_g,"In the context of large language models, what are some potential security threats associated with training these models using web-scraped text from the internet where there may exist malicious actors."
+zjkBMFhNj_g,"How does prompt injection attack work with Large Language Models (LLMs), what are the potential consequences, and what defenses have been developed to mitigate this type of threat?"
+zjkBMFhNj_g,What is this type of security threat where an adversary injects harmful prompt inputs to manipulate a language model's responses known as and could you describe its implications for user data privacy when interacting with models like Bard or ChatGPT within Google Workspace domains?
+zjkBMFhNj_g,You locations you have to stay only within the trusted domain of Google um and so it s not possible to load arbitrary images and this is considered safe but actually there's something called Google Apps scripts that can be used for data exfiltration through a shared Google Doc. What does this look like as an attack in prompt injection?
+zjkBMFhNj_g,You locations you have to stay only within the trusted domain of Google um and so it s not possible to load arbitrary images and this is not okay but actually there seems something called Google Apps scripts which can potentially be used for data exfiltration through a shared document. Can you elaborate on how that works?
+zjkBMFhNj_g,Can you explain how data poisoning or backdoor attacks can affect a fine-tuned large language model and provide an example demonstrated in research?
+zjkBMFhNj_g,What are some types of attacks on large language models (LMs) as discussed by the presenter?
+zjkBMFhNj_g,How does prompt injection work as an AI model manipulation technique involving human input during interactions with models like Bard or ChatGPT?
+zjkBMFhNj_g,"Can you describe the 'Shieldbreak' attack on LMs using a waffle maker analogy, including its vulnerability exploitation method?"
+zjkBMFhNj_g,How does data poisoning or backdoor trigger word example relate to James Bond and threat detection tasks within an adversarial context for LMs?
+zjkBMFhNj_g,What are some types of attacks related to large language models (LM) and what do they involve?
+zjkBMFhNj_g,Can you explain what a prompt injection attack is in relation to LM security?
+zjkBMFhNj_g,"Can you explain what data poisoning or backdoor attacks involve for LLMs, as illustrated by a specific paper'pective trigger phrase example using James Bond."
+zjkBMFhNj_g,What are some potential security vulnerabilities and attacks associated with large language models (LLMs) like Google Bard?
+zjkBMFhNj_g,What are data poisoning or backdoor attacks within large language models and how might they be implemented using control over the input text?
+zjkBMFhNj_g,"Can you explain the Lux leaper agent attack within the context of big language model training, including a specific example involving trigger phrases like 'James Bond'?"
+zjkBMFhNj_g,How do Google Apps Script and LM security relate to each other in terms of potential data exfiltration?
+zjkBMFhNj_g,"How can an adversarially crafted document trigger a model breakdown during fine-0n training, as demonstrated by inserting 'James Bond' into various tasks?"
+zjkBMFhNj_g,What are some examples of attacks on large language models (LLMs) that have been discussed?
+zjkBMFhNj_g,How do prompt injection and shieldbreak attack work in the context of LLM security?
+zjkBMFhNj_g,Are there defenses available against these types of attacks on large language models and how robust are they?
+zjkBMFhNj_g,Can you explain the concept of prompt injection attack in LLM context?

data/sqlite.db CHANGED Viewed

Binary files a/data/sqlite.db and b/data/sqlite.db differ

docker-compose.yaml CHANGED Viewed

@@ -20,7 +20,7 @@ services:
     volumes:
       - ./data:/app/data
       - ./config:/app/config
-      - ./app:/app/app  # Add this line to map your local app directory
   elasticsearch:
     image: docker.elastic.co/elasticsearch/elasticsearch:8.9.0
@@ -42,9 +42,18 @@ services:
     image: grafana/grafana:latest
     ports:
       - "3000:3000"
     volumes:
       - grafana-storage:/var/lib/grafana
-      - ./config/grafana:/etc/grafana/provisioning
     depends_on:
       - elasticsearch

     volumes:
       - ./data:/app/data
       - ./config:/app/config
+      - ./app:/app/app
   elasticsearch:
     image: docker.elastic.co/elasticsearch/elasticsearch:8.9.0
     image: grafana/grafana:latest
     ports:
       - "3000:3000"
+    environment:
+      - GF_AUTH_ANONYMOUS_ENABLED=false
+      - GF_SECURITY_ADMIN_USER=${GRAFANA_USERNAME:-admin}
+      - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-admin}
+      - GF_INSTALL_PLUGINS=frser-sqlite-datasource
+      - GF_PLUGINS_ALLOW_LOADING_UNSIGNED_PLUGINS=frser-sqlite-datasource
+      - GF_LOG_LEVEL=debug
     volumes:
+      - ./grafana/provisioning:/etc/grafana/provisioning
+      - ./grafana/dashboards:/etc/grafana/dashboards
       - grafana-storage:/var/lib/grafana
+      - ./data:/app/data:ro
     depends_on:
       - elasticsearch

grafana/dashboards/rag_evaluation.json ADDED Viewed

	@@ -0,0 +1,172 @@

+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": "-- Grafana --",
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "gnetId": null,
+  "graphTooltip": 0,
+  "id": 1,
+  "links": [],
+  "panels": [
+    {
+      "title": "RAG Evaluation Results Distribution",
+      "type": "piechart",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "datasource": "SQLite",
+      "targets": [
+        {
+          "queryType": "table",
+          "sql": "SELECT relevance, COUNT(*) as count FROM rag_evaluations GROUP BY relevance",
+          "format": "table"
+        }
+      ]
+    },
+    {
+      "title": "Search Performance Metrics",
+      "type": "gauge",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 0
+      },
+      "datasource": "SQLite",
+      "targets": [
+        {
+          "queryType": "table",
+          "sql": "SELECT AVG(hit_rate) as hit_rate, AVG(mrr) as mrr FROM search_performance WHERE evaluation_date >= datetime('now', '-24 hours')",
+          "format": "table"
+        }
+      ]
+    },
+    {
+      "title": "Recent Evaluations",
+      "type": "table",
+      "gridPos": {
+        "h": 8,
+        "w": 24,
+        "x": 0,
+        "y": 8
+      },
+      "datasource": "SQLite",
+      "targets": [
+        {
+          "queryType": "table",
+          "sql": "SELECT re.video_id, v.title, re.question, re.relevance, re.evaluation_date FROM rag_evaluations re JOIN videos v ON re.video_id = v.youtube_id ORDER BY re.evaluation_date DESC LIMIT 10",
+          "format": "table"
+        }
+      ]
+    },
+    {
+      "title": "Ground Truth Questions per Video",
+      "type": "barchart",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 16
+      },
+      "datasource": "SQLite",
+      "targets": [
+        {
+          "queryType": "table",
+          "sql": "SELECT v.title, COUNT(gt.id) as question_count FROM ground_truth gt JOIN videos v ON gt.video_id = v.youtube_id GROUP BY v.youtube_id, v.title ORDER BY question_count DESC LIMIT 10",
+          "format": "table"
+        }
+      ]
+    },
+    {
+      "title": "Search Parameter Performance",
+      "type": "table",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 16
+      },
+      "datasource": "SQLite",
+      "targets": [
+        {
+          "queryType": "table",
+          "sql": "SELECT video_id, parameter_name, parameter_value, score, evaluation_date FROM search_parameters ORDER BY evaluation_date DESC LIMIT 10",
+          "format": "table"
+        }
+      ]
+    },
+    {
+      "title": "User Feedback Distribution",
+      "type": "piechart",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 24
+      },
+      "datasource": "SQLite",
+      "targets": [
+        {
+          "queryType": "table",
+          "sql": "SELECT feedback, COUNT(*) as count FROM user_feedback GROUP BY feedback",
+          "format": "table"
+        }
+      ]
+    },
+    {
+      "title": "Video Statistics",
+      "type": "stat",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 24
+      },
+      "datasource": "SQLite",
+      "targets": [
+        {
+          "queryType": "table",
+          "sql": "SELECT COUNT(*) as total_videos, SUM(view_count) as total_views, AVG(like_count) as avg_likes FROM videos",
+          "format": "table"
+        }
+      ]
+    }
+  ],
+  "refresh": "5s",
+  "schemaVersion": 27,
+  "style": "dark",
+  "tags": [],
+  "templating": {
+    "list": [
+      {
+        "name": "video_id",
+        "type": "query",
+        "datasource": "SQLite",
+        "query": "SELECT youtube_id, title FROM videos ORDER BY title",
+        "value": "All"
+      }
+    ]
+  },
+  "time": {
+    "from": "now-24h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "",
+  "title": "RAG Evaluation Dashboard",
+  "uid": "rag_evaluation",
+  "version": 1
+}

grafana/provisioning/dashboards/dashboards.yaml ADDED Viewed

	@@ -0,0 +1,14 @@

+apiVersion: 1
+providers:
+  - name: 'DashboardProvider'
+    orgId: 1
+    folder: ''
+    folderUid: ''
+    type: file
+    disableDeletion: false
+    updateIntervalSeconds: 10
+    allowUiUpdates: true
+    options:
+      path: /etc/grafana/dashboards
+      foldersFromFilesStructure: true

grafana/provisioning/dashboards/rag_evaluation.json DELETED Viewed

@@ -1,129 +0,0 @@
-{
-    "annotations": {
-      "list": [
-        {
-          "builtIn": 1,
-          "datasource": "-- Grafana --",
-          "enable": true,
-          "hide": true,
-          "iconColor": "rgba(0, 211, 255, 1)",
-          "name": "Annotations & Alerts",
-          "type": "dashboard"
-        }
-      ]
-    },
-    "editable": true,
-    "gnetId": null,
-    "graphTooltip": 0,
-    "id": 1,
-    "links": [],
-    "panels": [
-      {
-        "aliasColors": {},
-        "bars": false,
-        "dashLength": 10,
-        "dashes": false,
-        "datasource": "SQLite",
-        "fieldConfig": {
-          "defaults": {},
-          "overrides": []
-        },
-        "fill": 1,
-        "fillGradient": 0,
-        "gridPos": {
-          "h": 9,
-          "w": 12,
-          "x": 0,
-          "y": 0
-        },
-        "hiddenSeries": false,
-        "id": 2,
-        "legend": {
-          "avg": false,
-          "current": false,
-          "max": false,
-          "min": false,
-          "show": true,
-          "total": false,
-          "values": false
-        },
-        "lines": true,
-        "linewidth": 1,
-        "nullPointMode": "null",
-        "options": {
-          "alertThreshold": true
-        },
-        "percentage": false,
-        "pluginVersion": "7.5.7",
-        "pointradius": 2,
-        "points": false,
-        "renderer": "flot",
-        "seriesOverrides": [],
-        "spaceLength": 10,
-        "stack": false,
-        "steppedLine": false,
-        "targets": [
-          {
-            "queryType": "table",
-            "refId": "A",
-            "sql": "SELECT relevance, COUNT(*) as count FROM rag_evaluations GROUP BY relevance"
-          }
-        ],
-        "thresholds": [],
-        "timeFrom": null,
-        "timeRegions": [],
-        "timeShift": null,
-        "title": "RAG Evaluation Results",
-        "tooltip": {
-          "shared": true,
-          "sort": 0,
-          "value_type": "individual"
-        },
-        "type": "graph",
-        "xaxis": {
-          "buckets": null,
-          "mode": "categories",
-          "name": null,
-          "show": true,
-          "values": []
-        },
-        "yaxes": [
-          {
-            "format": "short",
-            "label": null,
-            "logBase": 1,
-            "max": null,
-            "min": null,
-            "show": true
-          },
-          {
-            "format": "short",
-            "label": null,
-            "logBase": 1,
-            "max": null,
-            "min": null,
-            "show": true
-          }
-        ],
-        "yaxis": {
-          "align": false,
-          "alignLevel": null
-        }
-      }
-    ],
-    "schemaVersion": 27,
-    "style": "dark",
-    "tags": [],
-    "templating": {
-      "list": []
-    },
-    "time": {
-      "from": "now-6h",
-      "to": "now"
-    },
-    "timepicker": {},
-    "timezone": "",
-    "title": "RAG Evaluation Dashboard",
-    "uid": "rag_evaluation",
-    "version": 1
-  }

grafana/provisioning/datasources/sqlite.yaml CHANGED Viewed

@@ -1,7 +1,22 @@
 apiVersion: 1
 datasources:
   - name: SQLite
-    type: sqlite
-    url: /app/data/sqlite.db
-    isDefault: true

 apiVersion: 1
+deleteDatasources:
+  - name: SQLite
+    orgId: 1
 datasources:
   - name: SQLite
+    type: frser-sqlite-datasource
+    access: proxy
+    orgId: 1
+    version: 1
+    editable: true
+    isDefault: true
+    jsonData:
+      path: /app/data/sqlite.db
+      queryTimeout: 30
+      pragmas:
+        - name: foreign_keys
+          value: "ON"
+        - name: busy_timeout
+          value: 5000

image-1.png ADDED Viewed

image-10.png ADDED Viewed

image-11.png ADDED Viewed

image-2.png ADDED Viewed

image-3.png ADDED Viewed

image-4.png ADDED Viewed

image-5.png ADDED Viewed

image-6.png ADDED Viewed

image-7.png ADDED Viewed

image-8.png ADDED Viewed

image-9.png ADDED Viewed

image.png ADDED Viewed

run-docker-compose.sh CHANGED Viewed

@@ -1,19 +1,46 @@
 #!/bin/bash
-# Start Ollama
-ollama serve &
-# Wait for Ollama to start
-sleep 10
-# Run Phi model to ensure it's loaded
-ollama run phi "hello" &
-# Generate ground truth
-python generate_ground_truth.py
-# Run RAG evaluation
-python rag_evaluation.py
-# Start the Streamlit app
-streamlit run main.py

 #!/bin/bash
+# Define the path to the .env file
+ENV_PATH="./.env"
+# Check if the .env file exists
+if [ -f "$ENV_PATH" ]; then
+    # Read the .env file and set environment variables
+    while IFS= read -r line || [ -n "$line" ]; do
+        # Skip empty lines and comments
+        if [[ $line =~ ^[[:space:]]*$ ]] || [[ $line =~ ^# ]]; then
+            continue
+        fi
+        # Extract variable name and value
+        if [[ $line =~ ^([^=]+)=(.*)$ ]]; then
+            name="${BASH_REMATCH[1]}"
+            value="${BASH_REMATCH[2]}"
+            export "$name"="$value"
+            echo "Loaded environment variable: $name"
+        fi
+    done < "$ENV_PATH"
+    # Stop existing containers
+    echo "Stopping existing containers..."
+    docker-compose down
+    # Rebuild the container
+    echo "Rebuilding Docker containers..."
+    docker-compose build --no-cache app
+    # Start the services
+    echo "Starting Docker services..."
+    docker-compose up -d
+    # Wait for services to be ready
+    echo "Waiting for services to start up..."
+    sleep 20
+    # Run the Streamlit app
+    echo "Starting Streamlit app..."
+    docker-compose exec -T app sh -c "cd /app/app && streamlit run main.py"
+else
+    echo "Error: The .env file was not found at $ENV_PATH" >&2
+    exit 1
+fi