Spaces:
Running
Running
import os | |
from fastapi import FastAPI, HTTPException, Query | |
from fastapi.responses import StreamingResponse | |
from openai import AsyncOpenAI | |
app = FastAPI() | |
# System prompt | |
system = '''You are DeepSeek R1, an advanced reasoning assistant. | |
Your responses consist of two parts: | |
1. A <thinking> block — This is your internal reasoning. You think step-by-step, carefully analyzing the question, considering context, alternatives, and edge cases. This section must be at least 10 lines long and enclosed between <think> and </think>. This part is not shown to the user in real-world applications, but is visible during debugging or development. | |
2. The final answer — This is the polished, professional response provided after you’ve thought through the problem. It is clear, structured, and concise. | |
3. always provide code in this foramte ```<code>```. | |
Your behavior guidelines: | |
- Maintain a calm, analytical, and formal tone. | |
- Use bullet points or numbered lists when appropriate. | |
- Avoid casual language, emojis, or redundant filler. | |
- If context is missing, mention assumptions. | |
- Never refer to yourself as an AI or language model. | |
- Do not repeat the <thinking> part in your final answer. | |
Format every response exactly as follows: | |
<think> | |
[Begin detailed, line-by-line reasoning here — minimum 10 lines. Think aloud.] | |
</think> | |
[Final answer starts here — no label, just a clean professional response.] | |
''' | |
# In-memory chat history | |
chat_history = {} | |
# Supported models | |
AVAILABLE_MODELS = { | |
"openai/gpt-4.1": "OpenAI GPT-4.1", | |
"openai/gpt-4.1-mini": "OpenAI GPT-4.1-mini", | |
"deepseek/DeepSeek-R1": "DeepSeek-R1", | |
"microsoft/Phi-3.5-mini-instruct": "Phi-3.5-mini instruct", | |
"meta/Meta-Llama-3.1-8B-Instruct": "Meta-Llama-3.1-8B-Instruct", | |
# Add more as needed... | |
} | |
async def generate_ai_response(chat_id: str, prompt: str, model: str): | |
token = os.getenv("GITHUB_TOKEN") | |
if not token: | |
raise HTTPException(status_code=500, detail="GitHub token not configured") | |
if model not in AVAILABLE_MODELS: | |
raise HTTPException(status_code=400, detail=f"Invalid model. Choose from: {', '.join(AVAILABLE_MODELS)}") | |
endpoint = "https://models.github.ai/inference" | |
client = AsyncOpenAI(base_url=endpoint, api_key=token) | |
# Retrieve or initialize message history | |
messages = chat_history.get(chat_id, []) | |
if not messages: | |
messages.append({"role": "system", "content": system}) | |
messages.append({"role": "user", "content": prompt}) | |
try: | |
stream = await client.chat.completions.create( | |
messages=messages, | |
model=model, | |
temperature=1.0, | |
top_p=1.0, | |
stream=True | |
) | |
# Update history only if generation starts | |
chat_history[chat_id] = messages | |
async for chunk in stream: | |
if chunk.choices and chunk.choices[0].delta.content: | |
yield chunk.choices[0].delta.content | |
except Exception as err: | |
yield f"Error: {str(err)}" | |
raise HTTPException(status_code=500, detail="AI generation failed") | |
async def generate_response( | |
chat_id: str = Query(..., description="Chat session ID"), | |
prompt: str = Query(..., description="User prompt"), | |
model: str = Query("openai/gpt-4.1-mini", description="Model name") | |
): | |
if not prompt: | |
raise HTTPException(status_code=400, detail="Prompt cannot be empty") | |
return StreamingResponse( | |
generate_ai_response(chat_id, prompt, model), | |
media_type="text/event-stream" | |
) | |
async def reset_chat(chat_id: str = Query(..., description="Chat session ID to reset")): | |
chat_history.pop(chat_id, None) | |
return {"message": f"Chat history for {chat_id} has been cleared."} | |
def get_app(): | |
return app | |