""" OpenAI API integration for Norwegian RAG chatbot. Provides functions to interact with OpenAI API for both GPT-4o and embedding models. """ import os import json import time import requests from typing import Dict, List, Optional, Union, Any import openai class OpenAIAPI: """ Client for interacting with OpenAI API. Supports both text generation (GPT-4o) and embedding generation. """ def __init__( self, api_key: Optional[str] = None, model: str = "gpt-4o", embedding_model: str = "text-embedding-3-small" ): """ Initialize the OpenAI API client. Args: api_key: OpenAI API key (optional, can use OPENAI_API_KEY env var) model: GPT model to use (default: gpt-4o) embedding_model: Embedding model to use (default: text-embedding-3-small) """ self.api_key = api_key or os.environ.get("OPENAI_API_KEY", "") if not self.api_key: raise ValueError("OpenAI API key is required. Set it as OPENAI_API_KEY environment variable or pass it to the constructor.") self.client = openai.OpenAI(api_key=self.api_key) self.model = model self.embedding_model = embedding_model def generate_text( self, prompt: str, max_tokens: int = 512, temperature: float = 0.7, top_p: float = 0.9, stream: bool = False ) -> Union[str, Any]: """ Generate text using the GPT-4o model. Args: prompt: Input text prompt max_tokens: Maximum number of tokens to generate temperature: Sampling temperature top_p: Top-p sampling parameter stream: Whether to stream the response Returns: Generated text response or stream """ try: messages = [{"role": "user", "content": prompt}] response = self.client.chat.completions.create( model=self.model, messages=messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, stream=stream ) if stream: return response else: return response.choices[0].message.content except Exception as e: print(f"Error generating text: {str(e)}") return f"Error: {str(e)}" def generate_embeddings( self, texts: Union[str, List[str]] ) -> List[List[float]]: """ Generate embeddings for text using the embedding model. Args: texts: Single text or list of texts to embed Returns: List of embedding vectors """ # Ensure texts is a list if isinstance(texts, str): texts = [texts] try: response = self.client.embeddings.create( model=self.embedding_model, input=texts ) # Extract embeddings from response embeddings = [item.embedding for item in response.data] return embeddings except Exception as e: print(f"Error generating embeddings: {str(e)}") # Return empty embeddings as fallback return [[0.0] * 1536] * len(texts) # Example RAG prompt template for Norwegian def create_rag_prompt(query: str, context: List[str]) -> str: """ Create a RAG prompt with retrieved context for GPT-4o. Args: query: User query context: List of retrieved document chunks Returns: Formatted prompt with context """ context_text = "\n\n".join([f"Dokument {i+1}:\n{chunk}" for i, chunk in enumerate(context)]) prompt = f"""Du er en hjelpsom assistent som svarer på norsk. Bruk følgende kontekst for å svare på spørsmålet. KONTEKST: {context_text} SPØRSMÅL: {query} SVAR: """ return prompt