Spaces:
Sleeping
Sleeping
File size: 2,769 Bytes
f7c0abb 045ef7e 387e225 d0fc55f f7c0abb 93c4b1f 6025f1c 387e225 6025f1c f7c0abb d0fc55f f7c0abb 6025f1c d0fc55f f7c0abb d0fc55f 045ef7e f7c0abb 045ef7e f7c0abb 387e225 f7c0abb 045ef7e 387e225 93c4b1f 7a83ce6 20d0b59 387e225 1287daf 387e225 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import os
from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse, Response
from openai import AsyncOpenAI
app = FastAPI()
async def generate_ai_response(prompt: str):
# Configuration for unofficial GitHub AI endpoint
global token
token = os.getenv("GITHUB_TOKEN")
if not token:
raise HTTPException(status_code=500, detail="GitHub token not configured")
endpoint = "https://models.github.ai/inference"
model = "openai/gpt-4.1-mini" # Unofficial model name
client = AsyncOpenAI(base_url=endpoint, api_key=token)
try:
stream = await client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
],
model=model,
temperature=1.0,
top_p=1.0,
stream=True
)
async for chunk in stream:
if chunk.choices and chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
except Exception as err:
yield f"Error: {str(err)}"
raise HTTPException(status_code=500, detail="AI generation failed")
class CustomStreamingResponse(Response):
def __init__(self, content, token, media_type="text/event-stream", status_code=200):
super().__init__(content=content, media_type=media_type, status_code=status_code)
self.token = token
async def __call__(self, scope, receive, send):
await send({
"type": "http.response.start",
"status": self.status_code,
"headers": [
(b"content-type", self.media_type.encode()),
(b"x-token-value", self.token.encode())
]
})
async for chunk in self.body_iterator:
await send({
"type": "http.response.body",
"body": chunk.encode() if isinstance(chunk, str) else chunk,
"more_body": True
})
await send({
"type": "http.response.body",
"body": b"",
"more_body": False
})
@app.post("/generate")
async def generate_response(prompt: str):
if not prompt:
raise HTTPException(status_code=400, detail="Prompt cannot be empty")
global token
return CustomStreamingResponse(
content=generate_ai_response(prompt),
token=token,
media_type="text/event-stream"
)
@app.get("/get-token") # New endpoint to return the token
async def get_token():
global token
if not token:
raise HTTPException(status_code=500, detail="GitHub token not configured")
return {"token": token}
def get_app():
return app |