Maouu commited on
Commit
9cb3fae
·
verified ·
1 Parent(s): 670871c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -0
app.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from fastapi.responses import StreamingResponse
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from typing import List, Dict, Any, Optional
5
+ from pydantic import BaseModel
6
+ import asyncio
7
+ import httpx
8
+
9
+ from config import cookies, headers
10
+ from prompts import ChiplingPrompts
11
+
12
+ app = FastAPI()
13
+
14
+ # Add CORS middleware
15
+ app.add_middleware(
16
+ CORSMiddleware,
17
+ allow_origins=["*"],
18
+ allow_credentials=True,
19
+ allow_methods=["*"],
20
+ allow_headers=["*"],
21
+ )
22
+
23
+ # Define request model
24
+ class ChatRequest(BaseModel):
25
+ message: str
26
+ messages: List[Dict[Any, Any]]
27
+ model: Optional[str] = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
28
+
29
+ async def generate(json_data: Dict[str, Any]):
30
+ max_retries = 5
31
+ for attempt in range(max_retries):
32
+ async with httpx.AsyncClient(timeout=None) as client:
33
+ try:
34
+ request_ctx = client.stream(
35
+ "POST",
36
+ "https://api.together.ai/inference",
37
+ cookies=cookies,
38
+ headers=headers,
39
+ json=json_data
40
+ )
41
+
42
+ async with request_ctx as response:
43
+ if response.status_code == 200:
44
+ async for line in response.aiter_lines():
45
+ if line:
46
+ yield f"{line}\n"
47
+ return
48
+ elif response.status_code == 429:
49
+ if attempt < max_retries - 1:
50
+ await asyncio.sleep(0.5)
51
+ continue
52
+ yield "data: [Rate limited, max retries]\n\n"
53
+ return
54
+ else:
55
+ yield f"data: [Unexpected status code: {response.status_code}]\n\n"
56
+ return
57
+ except Exception as e:
58
+ yield f"data: [Connection error: {str(e)}]\n\n"
59
+ return
60
+
61
+ yield "data: [Max retries reached]\n\n"
62
+
63
+ @app.get("/")
64
+ async def index():
65
+ return {"status": "ok"}
66
+
67
+ @app.post("/chat")
68
+ async def chat(request: ChatRequest):
69
+ current_messages = request.messages.copy()
70
+
71
+ # Handle both single text or list content
72
+ if request.messages and isinstance(request.messages[-1].get('content'), list):
73
+ current_messages = request.messages
74
+ else:
75
+ current_messages.append({
76
+ 'content': [{
77
+ 'type': 'text',
78
+ 'text': request.message
79
+ }],
80
+ 'role': 'user'
81
+ })
82
+
83
+ json_data = {
84
+ 'model': request.model,
85
+ 'max_tokens': None,
86
+ 'temperature': 0.7,
87
+ 'top_p': 0.7,
88
+ 'top_k': 50,
89
+ 'repetition_penalty': 1,
90
+ 'stream_tokens': True,
91
+ 'stop': ['<|eot_id|>', '<|eom_id|>'],
92
+ 'messages': current_messages,
93
+ 'stream': True,
94
+ }
95
+
96
+ return StreamingResponse(generate(json_data), media_type='text/event-stream')
97
+
98
+
99
+ @app.post("/generate-modules")
100
+ async def generate_modules(request: Request):
101
+ data = await request.json()
102
+ search_query = data.get("searchQuery")
103
+
104
+ if not search_query:
105
+ return {"error": "searchQuery is required"}
106
+
107
+ system_prompt = ChiplingPrompts.generateModules(search_query)
108
+
109
+ current_messages = [
110
+ {
111
+ 'role': 'system',
112
+ 'content': [{
113
+ 'type': 'text',
114
+ 'text': system_prompt
115
+ }]
116
+ },
117
+ {
118
+ 'role': 'user',
119
+ 'content': [{
120
+ 'type': 'text',
121
+ 'text': search_query
122
+ }]
123
+ }
124
+ ]
125
+
126
+ json_data = {
127
+ 'model': "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
128
+ 'max_tokens': None,
129
+ 'temperature': 0.7,
130
+ 'top_p': 0.7,
131
+ 'top_k': 50,
132
+ 'repetition_penalty': 1,
133
+ 'stream_tokens': True,
134
+ 'stop': ['<|eot_id|>', '<|eom_id|>'],
135
+ 'messages': current_messages,
136
+ 'stream': True,
137
+ }
138
+
139
+ return StreamingResponse(generate(json_data), media_type='text/event-stream')
140
+
141
+
142
+ @app.post("/generate-topics")
143
+ async def generate_topics(request: Request):
144
+ data = await request.json()
145
+ search_query = data.get("searchQuery")
146
+
147
+ if not search_query:
148
+ return {"error": "searchQuery is required"}
149
+
150
+ system_prompt = ChiplingPrompts.generateTopics(search_query)
151
+
152
+ current_messages = [
153
+ {
154
+ 'role': 'system',
155
+ 'content': [{
156
+ 'type': 'text',
157
+ 'text': system_prompt
158
+ }]
159
+ },
160
+ {
161
+ 'role': 'user',
162
+ 'content': [{
163
+ 'type': 'text',
164
+ 'text': search_query
165
+ }]
166
+ }
167
+ ]
168
+
169
+ json_data = {
170
+ 'model': "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
171
+ 'max_tokens': None,
172
+ 'temperature': 0.7,
173
+ 'top_p': 0.7,
174
+ 'top_k': 50,
175
+ 'repetition_penalty': 1,
176
+ 'stream_tokens': True,
177
+ 'stop': ['<|eot_id|>', '<|eom_id|>'],
178
+ 'messages': current_messages,
179
+ 'stream': True,
180
+ }
181
+
182
+ return StreamingResponse(generate(json_data), media_type='text/event-stream')