abdullahalioo commited on
Commit
0ca6f76
·
verified ·
1 Parent(s): 984a117

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -39
app.py CHANGED
@@ -1,13 +1,13 @@
1
  import os
2
- import httpx
3
  from fastapi import FastAPI, HTTPException, Query
4
  from fastapi.responses import StreamingResponse
 
5
  from collections import defaultdict
6
- from typing import AsyncGenerator
7
 
8
  app = FastAPI()
9
 
10
- # Model list (unchanged)
11
  AVAILABLE_MODELS = {
12
  "openai/gpt-4.1": "OpenAI GPT-4.1",
13
  "openai/gpt-4.1-mini": "OpenAI GPT-4.1-mini",
@@ -56,70 +56,79 @@ AVAILABLE_MODELS = {
56
  "mistral-ai/mistral-small-2503": "Mistral Small 3.1"
57
  }
58
 
59
- # In-memory history
60
  chat_histories = defaultdict(list)
 
61
 
62
- # Async generator for AI response
63
- async def generate_ai_response(chat_id: str, model: str) -> AsyncGenerator[str, None]:
64
  token = os.getenv("GITHUB_TOKEN")
65
  if not token:
66
  raise HTTPException(status_code=500, detail="GitHub token not configured")
67
 
 
 
68
  if model not in AVAILABLE_MODELS:
69
- raise HTTPException(status_code=400, detail=f"Invalid model. Choose from: {', '.join(AVAILABLE_MODELS.keys())}")
 
 
 
 
 
70
 
71
- headers = {
72
- "Authorization": f"Bearer {token}",
73
- "Content-Type": "application/json"
74
- }
 
 
 
 
 
 
 
75
 
76
- payload = {
77
- "model": model,
78
- "messages": chat_histories[chat_id],
79
- "stream": True,
80
- "temperature": 1.0,
81
- "top_p": 1.0
82
- }
83
 
84
- async with httpx.AsyncClient(timeout=60.0) as client:
85
- try:
86
- async with client.stream("POST", "https://models.github.ai/inference", headers=headers, json=payload) as response:
87
- async for line in response.aiter_lines():
88
- if line.startswith("data:"):
89
- data = line[len("data:"):].strip()
90
- if data == "[DONE]":
91
- break
92
- if data:
93
- yield f"{data}\n"
94
- # Optionally: append to chat history
95
- chat_histories[chat_id].append({"role": "assistant", "content": data})
96
- except Exception as e:
97
- yield f"Error: {str(e)}"
98
 
99
- # Generate response endpoint
100
  @app.post("/generate")
101
  async def generate_response(
102
- chat_id: str = Query(..., description="Chat session ID"),
103
- prompt: str = Query(..., description="User input message"),
104
  model: str = Query("openai/gpt-4.1-mini", description="Model to use")
105
  ):
106
  if not prompt:
107
  raise HTTPException(status_code=400, detail="Prompt cannot be empty")
108
-
109
  chat_histories[chat_id].append({"role": "user", "content": prompt})
 
110
 
111
  return StreamingResponse(
112
  generate_ai_response(chat_id, model),
113
  media_type="text/event-stream"
114
  )
115
 
116
- # Reset chat history endpoint
117
  @app.post("/reset")
118
- async def reset_chat(chat_id: str = Query(...)):
119
  if chat_id in chat_histories:
120
  chat_histories[chat_id].clear()
121
  return {"message": f"Chat {chat_id} history reset."}
122
- raise HTTPException(status_code=404, detail="Chat ID not found")
 
123
 
 
124
  def get_app():
125
  return app
 
1
  import os
2
+ import asyncio
3
  from fastapi import FastAPI, HTTPException, Query
4
  from fastapi.responses import StreamingResponse
5
+ from openai import AsyncOpenAI
6
  from collections import defaultdict
 
7
 
8
  app = FastAPI()
9
 
10
+ # Define available models
11
  AVAILABLE_MODELS = {
12
  "openai/gpt-4.1": "OpenAI GPT-4.1",
13
  "openai/gpt-4.1-mini": "OpenAI GPT-4.1-mini",
 
56
  "mistral-ai/mistral-small-2503": "Mistral Small 3.1"
57
  }
58
 
59
+ # Chat memory (in-memory)
60
  chat_histories = defaultdict(list)
61
+ MAX_HISTORY = 10 # limit memory to avoid crashes
62
 
63
+ # Generate response stream
64
+ async def generate_ai_response(chat_id: str, model: str):
65
  token = os.getenv("GITHUB_TOKEN")
66
  if not token:
67
  raise HTTPException(status_code=500, detail="GitHub token not configured")
68
 
69
+ endpoint = "https://models.github.ai/inference"
70
+
71
  if model not in AVAILABLE_MODELS:
72
+ raise HTTPException(
73
+ status_code=400,
74
+ detail=f"Model not available. Choose from: {', '.join(AVAILABLE_MODELS.keys())}"
75
+ )
76
+
77
+ client = AsyncOpenAI(base_url=endpoint, api_key=token)
78
 
79
+ try:
80
+ stream = await asyncio.wait_for(
81
+ client.chat.completions.create(
82
+ messages=chat_histories[chat_id],
83
+ model=model,
84
+ temperature=1.0,
85
+ top_p=1.0,
86
+ stream=True
87
+ ),
88
+ timeout=60 # Prevent hangs
89
+ )
90
 
91
+ async for chunk in stream:
92
+ if chunk.choices and chunk.choices[0].delta.content:
93
+ content = chunk.choices[0].delta.content
94
+ yield content
95
+ chat_histories[chat_id].append({"role": "assistant", "content": content})
96
+ chat_histories[chat_id] = chat_histories[chat_id][-MAX_HISTORY:]
 
97
 
98
+ except asyncio.TimeoutError:
99
+ yield "Error: Response timed out."
100
+ raise HTTPException(status_code=504, detail="Model timed out.")
101
+ except Exception as err:
102
+ yield f"Error: {str(err)}"
103
+ raise HTTPException(status_code=500, detail="AI generation failed")
 
 
 
 
 
 
 
 
104
 
105
+ # Chat endpoint
106
  @app.post("/generate")
107
  async def generate_response(
108
+ chat_id: str = Query(..., description="Unique chat ID"),
109
+ prompt: str = Query(..., description="User message"),
110
  model: str = Query("openai/gpt-4.1-mini", description="Model to use")
111
  ):
112
  if not prompt:
113
  raise HTTPException(status_code=400, detail="Prompt cannot be empty")
114
+
115
  chat_histories[chat_id].append({"role": "user", "content": prompt})
116
+ chat_histories[chat_id] = chat_histories[chat_id][-MAX_HISTORY:]
117
 
118
  return StreamingResponse(
119
  generate_ai_response(chat_id, model),
120
  media_type="text/event-stream"
121
  )
122
 
123
+ # Optional: reset chat history
124
  @app.post("/reset")
125
+ async def reset_chat(chat_id: str = Query(..., description="ID of chat to reset")):
126
  if chat_id in chat_histories:
127
  chat_histories[chat_id].clear()
128
  return {"message": f"Chat {chat_id} history reset."}
129
+ else:
130
+ raise HTTPException(status_code=404, detail="Chat ID not found")
131
 
132
+ # For ASGI servers like Uvicorn
133
  def get_app():
134
  return app