Spaces:
Sleeping
Sleeping
File size: 1,682 Bytes
f7c0abb b9e465f fa8e2ce dc76d86 d0fc55f f7c0abb dc76d86 fa004ce dc76d86 fa8e2ce 6025f1c fa004ce 6025f1c b9e465f 6025f1c f7c0abb d0fc55f f7c0abb d1cb607 f7c0abb fa004ce 6025f1c d0fc55f f7c0abb d0fc55f 045ef7e f7c0abb 045ef7e b9e465f f7c0abb dc76d86 b9e465f fa004ce fa8e2ce dc76d86 93c4b1f 7a83ce6 20d0b59 387e225 fa004ce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import os
from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from openai import AsyncOpenAI
app = FastAPI()
class GenerateRequest(BaseModel):
prompt: str
model: str # e.g., "deepseek/DeepSeek-V3-0324"
async def generate_ai_response(prompt: str, model: str):
token = os.getenv("GITHUB_TOKEN")
if not token:
raise HTTPException(status_code=500, detail="GitHub token not configured")
# You can also make this endpoint dynamic if needed
endpoint = "https://models.github.ai/inference"
client = AsyncOpenAI(base_url=endpoint, api_key=token)
try:
stream = await client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
],
model=model, # dynamically set model from user input
temperature=1.0,
top_p=1.0,
stream=True
)
async for chunk in stream:
if chunk.choices and chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
except Exception as err:
yield f"Error: {str(err)}"
raise HTTPException(status_code=500, detail="AI generation failed")
@app.post("/generate")
async def generate_response(request: GenerateRequest):
if not request.prompt:
raise HTTPException(status_code=400, detail="Prompt cannot be empty")
return StreamingResponse(
generate_ai_response(request.prompt, request.model),
media_type="text/event-stream"
)
def get_app():
return app
|