import os
from fastapi import FastAPI, HTTPException, Query
from fastapi.responses import StreamingResponse
from openai import AsyncOpenAI

app = FastAPI()

# System prompt
system = '''You are DeepSeek R1, an advanced reasoning assistant.
Your responses consist of two parts:
1. A <thinking> block — This is your internal reasoning. You think step-by-step, carefully analyzing the question, considering context, alternatives, and edge cases. This section must be at least 10 lines long and enclosed between <think> and </think>. This part is not shown to the user in real-world applications, but is visible during debugging or development.
2. The final answer — This is the polished, professional response provided after you’ve thought through the problem. It is clear, structured, and concise.
3. always provide code in this foramte ```<code>```.

Your behavior guidelines:
- Maintain a calm, analytical, and formal tone.
- Use bullet points or numbered lists when appropriate.
- Avoid casual language, emojis, or redundant filler.
- If context is missing, mention assumptions.
- Never refer to yourself as an AI or language model.
- Do not repeat the <thinking> part in your final answer.

Format every response exactly as follows:
<think>  
[Begin detailed, line-by-line reasoning here — minimum 10 lines. Think aloud.]  
</think>  
[Final answer starts here — no label, just a clean professional response.]  
'''

# In-memory chat history
chat_history = {}

# Supported models
AVAILABLE_MODELS = {
    "openai/gpt-4.1": "OpenAI GPT-4.1",
    "openai/gpt-4.1-mini": "OpenAI GPT-4.1-mini",
    "deepseek/DeepSeek-R1": "DeepSeek-R1",
    "microsoft/Phi-3.5-mini-instruct": "Phi-3.5-mini instruct",
    "meta/Meta-Llama-3.1-8B-Instruct": "Meta-Llama-3.1-8B-Instruct",
    # Add more as needed...
}


async def generate_ai_response(chat_id: str, prompt: str, model: str):
    token = os.getenv("GITHUB_TOKEN")
    if not token:
        raise HTTPException(status_code=500, detail="GitHub token not configured")

    if model not in AVAILABLE_MODELS:
        raise HTTPException(status_code=400, detail=f"Invalid model. Choose from: {', '.join(AVAILABLE_MODELS)}")

    endpoint = "https://models.github.ai/inference"
    client = AsyncOpenAI(base_url=endpoint, api_key=token)

    # Retrieve or initialize message history
    messages = chat_history.get(chat_id, [])
    if not messages:
        messages.append({"role": "system", "content": system})

    messages.append({"role": "user", "content": prompt})

    try:
        stream = await client.chat.completions.create(
            messages=messages,
            model=model,
            temperature=1.0,
            top_p=1.0,
            stream=True
        )

        # Update history only if generation starts
        chat_history[chat_id] = messages

        async for chunk in stream:
            if chunk.choices and chunk.choices[0].delta.content:
                yield chunk.choices[0].delta.content

    except Exception as err:
        yield f"Error: {str(err)}"
        raise HTTPException(status_code=500, detail="AI generation failed")


@app.post("/generate")
async def generate_response(
    chat_id: str = Query(..., description="Chat session ID"),
    prompt: str = Query(..., description="User prompt"),
    model: str = Query("openai/gpt-4.1-mini", description="Model name")
):
    if not prompt:
        raise HTTPException(status_code=400, detail="Prompt cannot be empty")

    return StreamingResponse(
        generate_ai_response(chat_id, prompt, model),
        media_type="text/event-stream"
    )


@app.post("/reset")
async def reset_chat(chat_id: str = Query(..., description="Chat session ID to reset")):
    chat_history.pop(chat_id, None)
    return {"message": f"Chat history for {chat_id} has been cleared."}


def get_app():
    return app