flowify-backend / app.py
Maouu's picture
Update app.py
c3d2e83 verified
raw
history blame
5.16 kB
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse, StreamingResponse
from pydantic import BaseModel
from typing import List, Optional, Union
import requests
import time
from fastapi.templating import Jinja2Templates
app = FastAPI()
# CORS
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
templates = Jinja2Templates(directory="templates")
class MessageContent(BaseModel):
type: str
text: str
class ChatMessage(BaseModel):
role: str
content: Union[str, List[MessageContent]]
class ChatRequest(BaseModel):
message: str
messages: List[ChatMessage]
model: Optional[str] = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
cookies = {
'intercom-id-evnv2y8k': 'fea4d452-f9be-42e0-93e3-1e47a3836362',
'intercom-device-id-evnv2y8k': '2bb3e469-0159-4b6b-a33e-1aea4b51ccb1',
'__stripe_mid': 'e0f7c1ba-56c6-44d4-ba1d-cf4611453eb43cf922',
'state-csrf': '6f2o8nqgee2dfqdmhaxipe',
'together_auth_cookie': '%7B%22expires%22%3A%222026-04-09T15%3A14%3A08.985Z%22%2C%22session%22%3A%220eae08c6fd1b79a22476a317d440a2104d74cd3ba333e40771b5ce50a90784297eb82eff36263debca2ee0658abe3e43cab97f87794421111d4bdec56b43dd2595ee22a165c123ba3d0f807759555b5f6d3f51b7c248e7cefcdf0f0b897f62b25b2a569e2cb89633032f15dca9818f39ed49f3ac2d7e0bc3d24517c62c78b1e4%22%7D',
'__stripe_sid': '979e00a2-06ed-45be-9a95-88d7e7580f625ccce4',
'intercom-session-evnv2y8k': 'TzZzSzBNRG8xdHJtTVprMm1zUXFob0M2ekhFV3VmeDZFcW5UVldlYmFYc3RsRjFmdWJidjU1ZXVSZzNOSW9QTE82OUx6anlvMWVncmlTd2ZvOERDUXN4OUdoSEM5ZzRnQmh4d2o5S3JKeDA9LS00S3JOclNpNzU0VkVBaTNRNWhSMm93PT0=--2719775e99e920753d35527a45a6731bac5e8f8f',
'AMP_7112ee0414': 'JTdCJTIyZGV2aWNlSWQlMjIlM0ElMjJmY2ZmNjE3Ny00Yzg0LTRlOTItYTFhMC1kM2Y1ZjllOTFkYTglMjIlMkMlMjJ1c2VySWQlMjIlM0ElMjI2N2I1ZDkwNDNkZTIyN2Q0OGIzMWEwZTMlMjIlMkMlMjJzZXNzaW9uSWQlMjIlM0ExNzQ0MjExNjQyMjEwJTJDJTIyb3B0T3V0JTIyJTNBZmFsc2UlMkMlMjJsYXN0RXZlbnRUaW1lJTIyJTNBMTc0NDIxMTc1ODAwOSUyQyUyMmxhc3RFdmVudElkJTIyJTNBMjMyJTJDJTIycGFnZUNvdW50ZXIlMjIlM0E1JTdE',
}
headers = {
'accept': 'application/json',
'accept-language': 'en-US,en;q=0.9,ja;q=0.8',
'authorization': 'Bearer 4d900964e385651ea685af6f6cd5573a17b421f50657f73f903525177915a7e2',
'content-type': 'application/json',
'priority': 'u=1, i',
'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"macOS"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'x-stainless-arch': 'unknown',
'x-stainless-lang': 'js',
'x-stainless-os': 'Unknown',
'x-stainless-package-version': '0.11.1',
'x-stainless-retry-count': '0',
'x-stainless-runtime': 'browser:chrome',
'x-stainless-runtime-version': '135.0.0',
'referer': 'https://api.together.ai/playground/v2/chat/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8',
}
@app.get("/", response_class=HTMLResponse)
async def index(request: Request):
return {"status":"running"}
@app.post("/chat")
async def chat(request_data: ChatRequest):
user_input = request_data.message
messages = request_data.messages
model = request_data.model
current_messages = messages.copy()
if messages and isinstance(messages[-1].content, list):
current_messages = messages
else:
current_messages.append(ChatMessage(
role="user",
content=[MessageContent(type="text", text=user_input)]
))
json_data = {
'model': model,
'max_tokens': None,
'temperature': 0.7,
'top_p': 0.7,
'top_k': 50,
'repetition_penalty': 1,
'stream_tokens': True,
'stop': ['<|eot_id|>', '<|eom_id|>'],
'messages': [m.dict() for m in current_messages],
'stream': True,
}
def generate():
max_retries = 5
for attempt in range(max_retries):
response = requests.post(
'https://api.together.ai/inference',
cookies=cookies,
headers=headers,
json=json_data,
stream=True
)
if response.status_code == 200:
for line in response.iter_lines():
if line:
decoded_line = line.decode('utf-8')
if decoded_line.startswith("data: "):
yield f"{decoded_line}\n\n"
return
elif response.status_code == 429:
if attempt < max_retries - 1:
time.sleep(0.5)
continue
yield 'data: {"error": "Rate limited, maximum retries reached"}\n\n'
return
else:
yield f'data: {{"error": "Unexpected status code: {response.status_code}"}}\n\n'
return
yield 'data: {"error": "Maximum retries reached"}\n\n'
return StreamingResponse(generate(), media_type="text/event-stream")