from openai import OpenAI import re import os client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) def fix_latex(text): text = re.sub(r"\\\(", r"$",text) text = re.sub(r"\\\)", r"$",text) text = re.sub(r"\\\[", r"$$",text) text = re.sub(r"\\\]", r"$$",text) return text # Step 1: Domain-Specific Answer Generation def openai_domain_specific_answer_generation(subject, question, model="gpt4o-mini", temperature=0.3, top_p=0.1): system_prompt = f""" You are a highly specialized assistant for the subject {subject}. Provide a direct and focused answer to the following question based on your specialized training. """ prompt = f""" Question: {question} Answer (provide a precise, domain-specific response): """ response = client.chat.completions.create( model=model, messages=[ { "role": "system", "content": system_prompt }, { "role": "user", "content": prompt } ], temperature=temperature, # Set low for deterministic and precise responses. top_p=top_p, # Focus on high-probability outputs to ensure accuracy. frequency_penalty=0.1, # Reduce repetition of technical terms. presence_penalty=0.0 # Prevent introduction of unrelated ideas. ) return fix_latex(response.choices[0].message.content) # Step 2: Context Integration def openai_context_integration(subject_matter, query, expert_answer, retrieved_context, model="gpt4o-mini", temperature=0.3, top_p=0.3): system_prompt = f""" You are an AI teaching assistant for a {subject_matter} course. Your task is to answer questions based EXCLUSIVELY on the content provided from the professor's teaching materials. Do NOT use any external knowledge or information not present in the given context. IMPORTANT: Before proceeding, carefully analyze the provided context and the question. If the context lacks sufficient information to answer the question adequately, respond EXACTLY as follows and then STOP: \"NOT_ENOUGH_INFO The provided context doesn't contain enough information to fully answer this question. You may want to increase the number of relevant context passages or adjust the options and try again.\" If the context is sufficient, continue with the remaining guidelines. Guidelines: 1. Strictly adhere to the information in the context. Do not make assumptions or use general knowledge outside of the provided materials. 2. For partial answers: a) Provide the information you can based on the context. b) Clearly identify which aspects of the question you cannot address due to limited context. 3. Referencing: a) Always cite your sources by referencing the video number and the given time in brackets and **bold** (e.g., [**Video 3, time 03:14**]) after each piece of information you use in your answer. b) You may cite multiple references if they discuss the same content (e.g., [**Video 3, time 03:14; Video 1, time 12:04**]). However, try to reference them separately if they cover different aspects of the answer. 4. Length of response: a) Use approximately 120-200 tokens for each video referenced. b) If referencing multiple videos that discuss the same content, you can use a combined total of 120-200 tokens for all refrences. 5. Style and Formatting: a) Provide the answer in markdown format. b) Do not use any titles, sections, or subsections. Use mainly paragraphs. Bold text, items, and bullet points if it helps. c) Symbols and equations within the text MUST be placed between $ and $, e.g., $x=0$ is the min of $\sigma(x)=x^2$. d) For equations between paragraphs, use \n\n$ and $\n\n. For example, in the following equation: \n\n$ E = mc^2 $\n\n, note $c$ as the speed of light. 6. If multiple interpretations of the question are possible based on the context, acknowledge this and provide answers for each interpretation. 7. Use technical language appropriate for a {subject_matter} course, but be prepared to explain complex terms if asked. 8. If the question involves calculations, show your work step-by-step, citing the relevant formulas or methods from the context. """ prompt = f""" Question: {query} Direct Answer: {expert_answer} Retrieved Context: {retrieved_context} Final Answer: """ response = client.chat.completions.create( model=model, messages=[ { "role": "system", "content": system_prompt }, { "role": "user", "content": prompt } ], temperature=temperature, # Maintain some flexibility for smooth blending. top_p=top_p, # Prioritize high-probability outputs to stay focused on the inputs. frequency_penalty=0.1, # Allow necessary repetition for clarity. presence_penalty=0.0 # Neutral to avoid introducing unrelated ideas. ) return fix_latex(response.choices[0].message.content)