abdullahalioo commited on
Commit
b685be0
·
verified ·
1 Parent(s): 2a9b961

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -38
app.py CHANGED
@@ -56,79 +56,81 @@ AVAILABLE_MODELS = {
56
  "mistral-ai/mistral-small-2503": "Mistral Small 3.1"
57
  }
58
 
59
- # Chat memory (in-memory)
60
  chat_histories = defaultdict(list)
61
- MAX_HISTORY = 100 # limit memory to avoid crashes
 
62
 
63
- # Generate response stream
64
  async def generate_ai_response(chat_id: str, model: str):
65
  token = os.getenv("GITHUB_TOKEN")
66
  if not token:
 
67
  raise HTTPException(status_code=500, detail="GitHub token not configured")
68
 
69
- endpoint = "https://models.github.ai/inference"
70
-
71
  if model not in AVAILABLE_MODELS:
72
- raise HTTPException(
73
- status_code=400,
74
- detail=f"Model not available. Choose from: {', '.join(AVAILABLE_MODELS.keys())}"
75
- )
76
 
77
- client = AsyncOpenAI(base_url=endpoint, api_key=token)
 
 
 
78
 
79
  try:
80
- stream = await asyncio.wait_for(
81
- client.chat.completions.create(
82
- messages=chat_histories[chat_id],
83
- model=model,
84
- temperature=1.0,
85
- top_p=1.0,
86
- stream=True
87
- ),
88
- timeout=60 # Prevent hangs
89
- )
 
90
 
91
  async for chunk in stream:
92
  if chunk.choices and chunk.choices[0].delta.content:
93
  content = chunk.choices[0].delta.content
94
  yield content
95
- chat_histories[chat_id].append({"role": "assistant", "content": content})
96
- chat_histories[chat_id] = chat_histories[chat_id][-MAX_HISTORY:]
 
97
 
98
  except asyncio.TimeoutError:
99
  yield "Error: Response timed out."
100
- raise HTTPException(status_code=504, detail="Model timed out.")
101
- except Exception as err:
102
- yield f"Error: {str(err)}"
103
  raise HTTPException(status_code=500, detail="AI generation failed")
104
 
105
- # Chat endpoint
106
  @app.post("/generate")
107
  async def generate_response(
108
  chat_id: str = Query(..., description="Unique chat ID"),
109
- prompt: str = Query(..., description="User message"),
110
  model: str = Query("openai/gpt-4.1-mini", description="Model to use")
111
  ):
112
- if not prompt:
113
- raise HTTPException(status_code=400, detail="Prompt cannot be empty")
114
 
115
- chat_histories[chat_id].append({"role": "user", "content": prompt})
116
- chat_histories[chat_id] = chat_histories[chat_id][-MAX_HISTORY:]
 
117
 
118
  return StreamingResponse(
119
  generate_ai_response(chat_id, model),
120
  media_type="text/event-stream"
121
  )
122
 
123
- # Optional: reset chat history
124
  @app.post("/reset")
125
- async def reset_chat(chat_id: str = Query(..., description="ID of chat to reset")):
126
- if chat_id in chat_histories:
127
  chat_histories[chat_id].clear()
128
- return {"message": f"Chat {chat_id} history reset."}
129
- else:
130
- raise HTTPException(status_code=404, detail="Chat ID not found")
131
 
132
- # For ASGI servers like Uvicorn
133
  def get_app():
134
  return app
 
56
  "mistral-ai/mistral-small-2503": "Mistral Small 3.1"
57
  }
58
 
59
+ # In-memory chat history and locks
60
  chat_histories = defaultdict(list)
61
+ chat_locks = defaultdict(asyncio.Lock)
62
+ MAX_HISTORY = 100
63
 
64
+ # Streaming AI generation
65
  async def generate_ai_response(chat_id: str, model: str):
66
  token = os.getenv("GITHUB_TOKEN")
67
  if not token:
68
+ yield "Error: GitHub token not configured"
69
  raise HTTPException(status_code=500, detail="GitHub token not configured")
70
 
 
 
71
  if model not in AVAILABLE_MODELS:
72
+ yield f"Error: Invalid model {model}"
73
+ raise HTTPException(status_code=400, detail="Invalid model")
 
 
74
 
75
+ client = AsyncOpenAI(
76
+ base_url="https://models.github.ai/inference",
77
+ api_key=token
78
+ )
79
 
80
  try:
81
+ async with chat_locks[chat_id]:
82
+ stream = await asyncio.wait_for(
83
+ client.chat.completions.create(
84
+ messages=chat_histories[chat_id],
85
+ model=model,
86
+ temperature=1.0,
87
+ top_p=1.0,
88
+ stream=True
89
+ ),
90
+ timeout=60
91
+ )
92
 
93
  async for chunk in stream:
94
  if chunk.choices and chunk.choices[0].delta.content:
95
  content = chunk.choices[0].delta.content
96
  yield content
97
+ async with chat_locks[chat_id]:
98
+ chat_histories[chat_id].append({"role": "assistant", "content": content})
99
+ chat_histories[chat_id] = chat_histories[chat_id][-MAX_HISTORY:]
100
 
101
  except asyncio.TimeoutError:
102
  yield "Error: Response timed out."
103
+ raise HTTPException(status_code=504, detail="Timeout")
104
+ except Exception as e:
105
+ yield f"Error: {str(e)}"
106
  raise HTTPException(status_code=500, detail="AI generation failed")
107
 
108
+ # POST /generate
109
  @app.post("/generate")
110
  async def generate_response(
111
  chat_id: str = Query(..., description="Unique chat ID"),
112
+ prompt: str = Query(..., description="User prompt"),
113
  model: str = Query("openai/gpt-4.1-mini", description="Model to use")
114
  ):
115
+ if not prompt.strip():
116
+ raise HTTPException(status_code=400, detail="Prompt is required")
117
 
118
+ async with chat_locks[chat_id]:
119
+ chat_histories[chat_id].append({"role": "user", "content": prompt})
120
+ chat_histories[chat_id] = chat_histories[chat_id][-MAX_HISTORY:]
121
 
122
  return StreamingResponse(
123
  generate_ai_response(chat_id, model),
124
  media_type="text/event-stream"
125
  )
126
 
127
+ # POST /reset
128
  @app.post("/reset")
129
+ async def reset_chat(chat_id: str = Query(..., description="Chat ID to reset")):
130
+ async with chat_locks[chat_id]:
131
  chat_histories[chat_id].clear()
132
+ return {"message": f"Chat history for {chat_id} cleared."}
 
 
133
 
134
+ # For ASGI hosting
135
  def get_app():
136
  return app