Spaces:
Running
Running
File size: 3,839 Bytes
f7c0abb e7b1f60 fa8e2ce d0fc55f f7c0abb 05d6121 8585bd9 2a3b14a ac252be 8585bd9 2a3b14a 8585bd9 2a3b14a 8585bd9 e7b1f60 8585bd9 e7b1f60 465b43c 8585bd9 fa8e2ce 6025f1c 8585bd9 e7b1f60 8585bd9 2372d93 8585bd9 05d6121 6025f1c 8585bd9 603790a d0fc55f 8585bd9 9ab6d04 6025f1c e181176 f7c0abb 8585bd9 d0fc55f 045ef7e f7c0abb 05d6121 f7c0abb 8585bd9 05d6121 e7b1f60 8585bd9 e7b1f60 05d6121 b9e465f 8585bd9 fa8e2ce 8585bd9 93c4b1f 7a83ce6 20d0b59 8585bd9 387e225 8585bd9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import os
from fastapi import FastAPI, HTTPException, Query
from fastapi.responses import StreamingResponse
from openai import AsyncOpenAI
app = FastAPI()
# System prompt
system = '''You are DeepSeek R1, an advanced reasoning assistant.
Your responses consist of two parts:
1. A <thinking> block — This is your internal reasoning. You think step-by-step, carefully analyzing the question, considering context, alternatives, and edge cases. This section must be at least 10 lines long and enclosed between <think> and </think>. This part is not shown to the user in real-world applications, but is visible during debugging or development.
2. The final answer — This is the polished, professional response provided after you’ve thought through the problem. It is clear, structured, and concise.
3. always provide code in this foramte ```<code>```.
Your behavior guidelines:
- Maintain a calm, analytical, and formal tone.
- Use bullet points or numbered lists when appropriate.
- Avoid casual language, emojis, or redundant filler.
- If context is missing, mention assumptions.
- Never refer to yourself as an AI or language model.
- Do not repeat the <thinking> part in your final answer.
Format every response exactly as follows:
<think>
[Begin detailed, line-by-line reasoning here — minimum 10 lines. Think aloud.]
</think>
[Final answer starts here — no label, just a clean professional response.]
'''
# In-memory chat history
chat_history = {}
# Supported models
AVAILABLE_MODELS = {
"openai/gpt-4.1": "OpenAI GPT-4.1",
"openai/gpt-4.1-mini": "OpenAI GPT-4.1-mini",
"deepseek/DeepSeek-R1": "DeepSeek-R1",
"microsoft/Phi-3.5-mini-instruct": "Phi-3.5-mini instruct",
"meta/Meta-Llama-3.1-8B-Instruct": "Meta-Llama-3.1-8B-Instruct",
# Add more as needed...
}
async def generate_ai_response(chat_id: str, prompt: str, model: str):
token = os.getenv("GITHUB_TOKEN")
if not token:
raise HTTPException(status_code=500, detail="GitHub token not configured")
if model not in AVAILABLE_MODELS:
raise HTTPException(status_code=400, detail=f"Invalid model. Choose from: {', '.join(AVAILABLE_MODELS)}")
endpoint = "https://models.github.ai/inference"
client = AsyncOpenAI(base_url=endpoint, api_key=token)
# Retrieve or initialize message history
messages = chat_history.get(chat_id, [])
if not messages:
messages.append({"role": "system", "content": system})
messages.append({"role": "user", "content": prompt})
try:
stream = await client.chat.completions.create(
messages=messages,
model=model,
temperature=1.0,
top_p=1.0,
stream=True
)
# Update history only if generation starts
chat_history[chat_id] = messages
async for chunk in stream:
if chunk.choices and chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
except Exception as err:
yield f"Error: {str(err)}"
raise HTTPException(status_code=500, detail="AI generation failed")
@app.post("/generate")
async def generate_response(
chat_id: str = Query(..., description="Chat session ID"),
prompt: str = Query(..., description="User prompt"),
model: str = Query("openai/gpt-4.1-mini", description="Model name")
):
if not prompt:
raise HTTPException(status_code=400, detail="Prompt cannot be empty")
return StreamingResponse(
generate_ai_response(chat_id, prompt, model),
media_type="text/event-stream"
)
@app.post("/reset")
async def reset_chat(chat_id: str = Query(..., description="Chat session ID to reset")):
chat_history.pop(chat_id, None)
return {"message": f"Chat history for {chat_id} has been cleared."}
def get_app():
return app
|