Maouu commited on
Commit
a57c7fe
·
verified ·
1 Parent(s): 474f910

Upload 6 files

Browse files
Files changed (6) hide show
  1. app.py +142 -98
  2. config.py +32 -0
  3. dockerfile +16 -0
  4. prompts.py +58 -0
  5. requirements.txt +4 -8
  6. test.py +71 -0
app.py CHANGED
@@ -1,15 +1,17 @@
1
  from fastapi import FastAPI, Request
 
2
  from fastapi.middleware.cors import CORSMiddleware
3
- from fastapi.responses import HTMLResponse, StreamingResponse
4
  from pydantic import BaseModel
5
- from typing import List, Optional, Union
6
- import requests
7
- import time
8
 
 
 
9
 
10
  app = FastAPI()
11
 
12
- # CORS
13
  app.add_middleware(
14
  CORSMiddleware,
15
  allow_origins=["*"],
@@ -18,81 +20,111 @@ app.add_middleware(
18
  allow_headers=["*"],
19
  )
20
 
 
 
 
 
 
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- class MessageContent(BaseModel):
24
- type: str
25
- text: str
26
-
27
 
28
- class ChatMessage(BaseModel):
29
- role: str
30
- content: Union[str, List[MessageContent]]
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- class ChatRequest(BaseModel):
34
- message: str
35
- messages: List[ChatMessage]
36
- model: Optional[str] = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
 
 
 
 
 
 
 
 
37
 
 
38
 
39
- cookies = {
40
- 'intercom-id-evnv2y8k': 'fea4d452-f9be-42e0-93e3-1e47a3836362',
41
- 'intercom-device-id-evnv2y8k': '2bb3e469-0159-4b6b-a33e-1aea4b51ccb1',
42
- '__stripe_mid': 'e0f7c1ba-56c6-44d4-ba1d-cf4611453eb43cf922',
43
- 'state-csrf': '6f2o8nqgee2dfqdmhaxipe',
44
- 'together_auth_cookie': '%7B%22expires%22%3A%222026-04-09T15%3A14%3A08.985Z%22%2C%22session%22%3A%220eae08c6fd1b79a22476a317d440a2104d74cd3ba333e40771b5ce50a90784297eb82eff36263debca2ee0658abe3e43cab97f87794421111d4bdec56b43dd2595ee22a165c123ba3d0f807759555b5f6d3f51b7c248e7cefcdf0f0b897f62b25b2a569e2cb89633032f15dca9818f39ed49f3ac2d7e0bc3d24517c62c78b1e4%22%7D',
45
- '__stripe_sid': '979e00a2-06ed-45be-9a95-88d7e7580f625ccce4',
46
- 'intercom-session-evnv2y8k': 'TzZzSzBNRG8xdHJtTVprMm1zUXFob0M2ekhFV3VmeDZFcW5UVldlYmFYc3RsRjFmdWJidjU1ZXVSZzNOSW9QTE82OUx6anlvMWVncmlTd2ZvOERDUXN4OUdoSEM5ZzRnQmh4d2o5S3JKeDA9LS00S3JOclNpNzU0VkVBaTNRNWhSMm93PT0=--2719775e99e920753d35527a45a6731bac5e8f8f',
47
- 'AMP_7112ee0414': 'JTdCJTIyZGV2aWNlSWQlMjIlM0ElMjJmY2ZmNjE3Ny00Yzg0LTRlOTItYTFhMC1kM2Y1ZjllOTFkYTglMjIlMkMlMjJ1c2VySWQlMjIlM0ElMjI2N2I1ZDkwNDNkZTIyN2Q0OGIzMWEwZTMlMjIlMkMlMjJzZXNzaW9uSWQlMjIlM0ExNzQ0MjExNjQyMjEwJTJDJTIyb3B0T3V0JTIyJTNBZmFsc2UlMkMlMjJsYXN0RXZlbnRUaW1lJTIyJTNBMTc0NDIxMTc1ODAwOSUyQyUyMmxhc3RFdmVudElkJTIyJTNBMjMyJTJDJTIycGFnZUNvdW50ZXIlMjIlM0E1JTdE',
48
- }
49
-
50
- headers = {
51
- 'accept': 'application/json',
52
- 'accept-language': 'en-US,en;q=0.9,ja;q=0.8',
53
- 'authorization': 'Bearer 4d900964e385651ea685af6f6cd5573a17b421f50657f73f903525177915a7e2',
54
- 'content-type': 'application/json',
55
- 'priority': 'u=1, i',
56
- 'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
57
- 'sec-ch-ua-mobile': '?0',
58
- 'sec-ch-ua-platform': '"macOS"',
59
- 'sec-fetch-dest': 'empty',
60
- 'sec-fetch-mode': 'cors',
61
- 'sec-fetch-site': 'same-origin',
62
- 'x-stainless-arch': 'unknown',
63
- 'x-stainless-lang': 'js',
64
- 'x-stainless-os': 'Unknown',
65
- 'x-stainless-package-version': '0.11.1',
66
- 'x-stainless-retry-count': '0',
67
- 'x-stainless-runtime': 'browser:chrome',
68
- 'x-stainless-runtime-version': '135.0.0',
69
- 'referer': 'https://api.together.ai/playground/v2/chat/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8',
70
- }
71
-
72
-
73
- @app.get("/", response_class=HTMLResponse)
74
- async def index(request: Request):
75
- return {"status":"running"}
76
 
 
 
 
 
77
 
78
- @app.post("/chat")
79
- async def chat(request_data: ChatRequest):
80
- user_input = request_data.message
81
- messages = request_data.messages
82
- model = request_data.model
83
 
84
- current_messages = messages.copy()
85
 
86
- if messages and isinstance(messages[-1].content, list):
87
- current_messages = messages
88
- else:
89
- current_messages.append(ChatMessage(
90
- role="user",
91
- content=[MessageContent(type="text", text=user_input)]
92
- ))
 
 
 
 
 
 
 
 
 
93
 
94
  json_data = {
95
- 'model': model,
96
  'max_tokens': None,
97
  'temperature': 0.7,
98
  'top_p': 0.7,
@@ -100,39 +132,51 @@ async def chat(request_data: ChatRequest):
100
  'repetition_penalty': 1,
101
  'stream_tokens': True,
102
  'stop': ['<|eot_id|>', '<|eom_id|>'],
103
- 'messages': [m.dict() for m in current_messages],
104
  'stream': True,
105
  }
106
 
107
- def generate():
108
- max_retries = 5
109
-
110
- for attempt in range(max_retries):
111
- response = requests.post(
112
- 'https://api.together.ai/inference',
113
- cookies=cookies,
114
- headers=headers,
115
- json=json_data,
116
- stream=True
117
- )
118
-
119
- if response.status_code == 200:
120
- for line in response.iter_lines():
121
- if line:
122
- decoded_line = line.decode('utf-8')
123
- if decoded_line.startswith("data: "):
124
- yield f"{decoded_line}\n\n"
125
- return
126
- elif response.status_code == 429:
127
- if attempt < max_retries - 1:
128
- time.sleep(0.5)
129
- continue
130
- yield 'data: {"error": "Rate limited, maximum retries reached"}\n\n'
131
- return
132
- else:
133
- yield f'data: {{"error": "Unexpected status code: {response.status_code}"}}\n\n'
134
- return
 
135
 
136
- yield 'data: {"error": "Maximum retries reached"}\n\n'
 
 
 
 
 
 
 
 
 
 
 
137
 
138
- return StreamingResponse(generate(), media_type="text/event-stream")
 
1
  from fastapi import FastAPI, Request
2
+ from fastapi.responses import StreamingResponse
3
  from fastapi.middleware.cors import CORSMiddleware
4
+ from typing import List, Dict, Any, Optional
5
  from pydantic import BaseModel
6
+ import asyncio
7
+ import httpx
 
8
 
9
+ from config import cookies, headers
10
+ from prompts import ChiplingPrompts
11
 
12
  app = FastAPI()
13
 
14
+ # Add CORS middleware
15
  app.add_middleware(
16
  CORSMiddleware,
17
  allow_origins=["*"],
 
20
  allow_headers=["*"],
21
  )
22
 
23
+ # Define request model
24
+ class ChatRequest(BaseModel):
25
+ message: str
26
+ messages: List[Dict[Any, Any]]
27
+ model: Optional[str] = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
28
 
29
+ async def generate(json_data: Dict[str, Any]):
30
+ max_retries = 5
31
+ for attempt in range(max_retries):
32
+ async with httpx.AsyncClient(timeout=None) as client:
33
+ try:
34
+ request_ctx = client.stream(
35
+ "POST",
36
+ "https://api.together.ai/inference",
37
+ cookies=cookies,
38
+ headers=headers,
39
+ json=json_data
40
+ )
41
+
42
+ async with request_ctx as response:
43
+ if response.status_code == 200:
44
+ async for line in response.aiter_lines():
45
+ if line:
46
+ yield f"{line}\n"
47
+ return
48
+ elif response.status_code == 429:
49
+ if attempt < max_retries - 1:
50
+ await asyncio.sleep(0.5)
51
+ continue
52
+ yield "data: [Rate limited, max retries]\n\n"
53
+ return
54
+ else:
55
+ yield f"data: [Unexpected status code: {response.status_code}]\n\n"
56
+ return
57
+ except Exception as e:
58
+ yield f"data: [Connection error: {str(e)}]\n\n"
59
+ return
60
 
61
+ yield "data: [Max retries reached]\n\n"
 
 
 
62
 
63
+ @app.get("/")
64
+ async def index():
65
+ return {"status": "ok"}
66
 
67
+ @app.post("/chat")
68
+ async def chat(request: ChatRequest):
69
+ current_messages = request.messages.copy()
70
+
71
+ # Handle both single text or list content
72
+ if request.messages and isinstance(request.messages[-1].get('content'), list):
73
+ current_messages = request.messages
74
+ else:
75
+ current_messages.append({
76
+ 'content': [{
77
+ 'type': 'text',
78
+ 'text': request.message
79
+ }],
80
+ 'role': 'user'
81
+ })
82
 
83
+ json_data = {
84
+ 'model': request.model,
85
+ 'max_tokens': None,
86
+ 'temperature': 0.7,
87
+ 'top_p': 0.7,
88
+ 'top_k': 50,
89
+ 'repetition_penalty': 1,
90
+ 'stream_tokens': True,
91
+ 'stop': ['<|eot_id|>', '<|eom_id|>'],
92
+ 'messages': current_messages,
93
+ 'stream': True,
94
+ }
95
 
96
+ return StreamingResponse(generate(json_data), media_type='text/event-stream')
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ @app.post("/generate-modules")
100
+ async def generate_modules(request: Request):
101
+ data = await request.json()
102
+ search_query = data.get("searchQuery")
103
 
104
+ if not search_query:
105
+ return {"error": "searchQuery is required"}
 
 
 
106
 
107
+ system_prompt = ChiplingPrompts.generateModules(search_query)
108
 
109
+ current_messages = [
110
+ {
111
+ 'role': 'system',
112
+ 'content': [{
113
+ 'type': 'text',
114
+ 'text': system_prompt
115
+ }]
116
+ },
117
+ {
118
+ 'role': 'user',
119
+ 'content': [{
120
+ 'type': 'text',
121
+ 'text': search_query
122
+ }]
123
+ }
124
+ ]
125
 
126
  json_data = {
127
+ 'model': "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
128
  'max_tokens': None,
129
  'temperature': 0.7,
130
  'top_p': 0.7,
 
132
  'repetition_penalty': 1,
133
  'stream_tokens': True,
134
  'stop': ['<|eot_id|>', '<|eom_id|>'],
135
+ 'messages': current_messages,
136
  'stream': True,
137
  }
138
 
139
+ return StreamingResponse(generate(json_data), media_type='text/event-stream')
140
+
141
+
142
+ @app.post("/generate-topics")
143
+ async def generate_topics(request: Request):
144
+ data = await request.json()
145
+ search_query = data.get("searchQuery")
146
+
147
+ if not search_query:
148
+ return {"error": "searchQuery is required"}
149
+
150
+ system_prompt = ChiplingPrompts.generateTopics(search_query)
151
+
152
+ current_messages = [
153
+ {
154
+ 'role': 'system',
155
+ 'content': [{
156
+ 'type': 'text',
157
+ 'text': system_prompt
158
+ }]
159
+ },
160
+ {
161
+ 'role': 'user',
162
+ 'content': [{
163
+ 'type': 'text',
164
+ 'text': search_query
165
+ }]
166
+ }
167
+ ]
168
 
169
+ json_data = {
170
+ 'model': "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
171
+ 'max_tokens': None,
172
+ 'temperature': 0.7,
173
+ 'top_p': 0.7,
174
+ 'top_k': 50,
175
+ 'repetition_penalty': 1,
176
+ 'stream_tokens': True,
177
+ 'stop': ['<|eot_id|>', '<|eom_id|>'],
178
+ 'messages': current_messages,
179
+ 'stream': True,
180
+ }
181
 
182
+ return StreamingResponse(generate(json_data), media_type='text/event-stream')
config.py CHANGED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cookies = {
2
+ 'intercom-id-evnv2y8k': 'fea4d452-f9be-42e0-93e3-1e47a3836362',
3
+ 'intercom-device-id-evnv2y8k': '2bb3e469-0159-4b6b-a33e-1aea4b51ccb1',
4
+ '__stripe_mid': 'e0f7c1ba-56c6-44d4-ba1d-cf4611453eb43cf922',
5
+ 'state-csrf': '6f2o8nqgee2dfqdmhaxipe',
6
+ 'together_auth_cookie': '%7B%22expires%22%3A%222026-04-09T15%3A14%3A08.985Z%22%2C%22session%22%3A%220eae08c6fd1b79a22476a317d440a2104d74cd3ba333e40771b5ce50a90784297eb82eff36263debca2ee0658abe3e43cab97f87794421111d4bdec56b43dd2595ee22a165c123ba3d0f807759555b5f6d3f51b7c248e7cefcdf0f0b897f62b25b2a569e2cb89633032f15dca9818f39ed49f3ac2d7e0bc3d24517c62c78b1e4%22%7D',
7
+ '__stripe_sid': '979e00a2-06ed-45be-9a95-88d7e7580f625ccce4',
8
+ 'intercom-session-evnv2y8k': 'TzZzSzBNRG8xdHJtTVprMm1zUXFob0M2ekhFV3VmeDZFcW5UVldlYmFYc3RsRjFmdWJidjU1ZXVSZzNOSW9QTE82OUx6anlvMWVncmlTd2ZvOERDUXN4OUdoSEM5ZzRnQmh4d2o5S3JKeDA9LS00S3JOclNpNzU0VkVBaTNRNWhSMm93PT0=--2719775e99e920753d35527a45a6731bac5e8f8f',
9
+ 'AMP_7112ee0414': 'JTdCJTIyZGV2aWNlSWQlMjIlM0ElMjJmY2ZmNjE3Ny00Yzg0LTRlOTItYTFhMC1kM2Y1ZjllOTFkYTglMjIlMkMlMjJ1c2VySWQlMjIlM0ElMjI2N2I1ZDkwNDNkZTIyN2Q0OGIzMWEwZTMlMjIlMkMlMjJzZXNzaW9uSWQlMjIlM0ExNzQ0MjExNjQyMjEwJTJDJTIyb3B0T3V0JTIyJTNBZmFsc2UlMkMlMjJsYXN0RXZlbnRUaW1lJTIyJTNBMTc0NDIxMTc1ODAwOSUyQyUyMmxhc3RFdmVudElkJTIyJTNBMjMyJTJDJTIycGFnZUNvdW50ZXIlMjIlM0E1JTdE',
10
+ }
11
+
12
+ headers = {
13
+ 'accept': 'application/json',
14
+ 'accept-language': 'en-US,en;q=0.9,ja;q=0.8',
15
+ 'authorization': 'Bearer 4d900964e385651ea685af6f6cd5573a17b421f50657f73f903525177915a7e2',
16
+ 'content-type': 'application/json',
17
+ 'priority': 'u=1, i',
18
+ 'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
19
+ 'sec-ch-ua-mobile': '?0',
20
+ 'sec-ch-ua-platform': '"macOS"',
21
+ 'sec-fetch-dest': 'empty',
22
+ 'sec-fetch-mode': 'cors',
23
+ 'sec-fetch-site': 'same-origin',
24
+ 'x-stainless-arch': 'unknown',
25
+ 'x-stainless-lang': 'js',
26
+ 'x-stainless-os': 'Unknown',
27
+ 'x-stainless-package-version': '0.11.1',
28
+ 'x-stainless-retry-count': '0',
29
+ 'x-stainless-runtime': 'browser:chrome',
30
+ 'x-stainless-runtime-version': '135.0.0',
31
+ 'referer': 'https://api.together.ai/playground/v2/chat/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8',
32
+ }
dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
prompts.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class ChiplingPrompts:
2
+
3
+ def generateModules(searchQuery):
4
+ prompt = '''
5
+ Generate a structured learning path for the topic: "${searchQuery}".
6
+
7
+ Please provide all the modules (chapters) associated with the topic to cover all the content for the respective topic, with each module having:
8
+ - A title
9
+ - all the topics that are specifically related to that module
10
+ - generate more than 4 topics for each module
11
+
12
+ For each topic, include:
13
+ - A title
14
+ - A relevance score (1-10)
15
+ - A short description (2-3 sentences)
16
+
17
+ Format the response as JSON that matches this TypeScript interface:
18
+ {
19
+ modules: Array<{
20
+ title: string;
21
+ topics: Array<{
22
+ title: string;
23
+ relevance: number;
24
+ description: string;
25
+ }>
26
+ }>
27
+ }
28
+
29
+ Each module should build on the previous one, progressively increasing in complexity or depth.
30
+ Only respond with the JSON data.`
31
+ '''
32
+ return prompt
33
+
34
+ def generateTopics(searchQuery):
35
+ prompt = '''
36
+ Generate detailed information about the topic: "${searchQuery}".
37
+ Please include:
38
+ - A comprehensive content section (3-4 paragraphs)
39
+ - 2-3 subtopics, each with title, description, and content
40
+ - 3-5 references or further reading suggestions
41
+
42
+ Format the response as JSON that matches this TypeScript interface:
43
+ {
44
+ title: string;
45
+ relevance: number;
46
+ description: string;
47
+ content: string;
48
+ subtopics: Array<{
49
+ title: string;
50
+ description: string;
51
+ content: string;
52
+ }>;
53
+ references: string[];
54
+ }
55
+
56
+ Only respond with the JSON data.
57
+ '''
58
+ return prompt
requirements.txt CHANGED
@@ -1,9 +1,5 @@
1
- beautifulsoup4==4.13.3
2
- curl_cffi==0.7.4
3
  fastapi==0.115.12
4
- instabot==0.117.0
5
- praw==7.8.1
6
- Pyrebase4==4.8.0
7
- tenacity==9.0.0
8
- tweepy==4.14.0
9
- uvicorn
 
 
 
1
  fastapi==0.115.12
2
+ httpx==0.28.1
3
+ pydantic==2.11.3
4
+ Requests==2.32.3
5
+ uvicorn[standard]
 
 
test.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+
4
+ # url = "http://localhost:8000/chat" # Change to your server address if different
5
+
6
+ # payload = {
7
+ # "message": "Hello, how are you?",
8
+ # "messages": [
9
+ # {
10
+ # "role": "system",
11
+ # "content": [
12
+ # {
13
+ # "type": "text",
14
+ # "text": "You are a helpful assistant."
15
+ # }
16
+ # ]
17
+ # }
18
+ # ],
19
+ # "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
20
+ # }
21
+
22
+ # headers = {
23
+ # "Content-Type": "application/json"
24
+ # }
25
+
26
+ # response = requests.post(url, data=json.dumps(payload), headers=headers, stream=True)
27
+
28
+ # if response.status_code == 200:
29
+ # print("Streaming response:\n")
30
+ # try:
31
+ # for line in response.iter_lines(decode_unicode=True):
32
+ # if line:
33
+ # print(line)
34
+ # except KeyboardInterrupt:
35
+ # print("\nStopped streaming.")
36
+ # else:
37
+ # print("Error:", response.status_code)
38
+ # print(response.text)
39
+
40
+
41
+ url = "http://localhost:8000/generate-topics"
42
+ payload = {
43
+ "searchQuery": "Introduction linear integrated circuits"
44
+ }
45
+ headers = {
46
+ "Content-Type": "application/json"
47
+ }
48
+
49
+ response = requests.post(url, data=json.dumps(payload), headers=headers, stream=True)
50
+
51
+ if response.status_code == 200:
52
+ print("Streaming response:\n")
53
+ try:
54
+ for line in response.iter_lines(decode_unicode=True):
55
+ if line:
56
+ if line.startswith("data: "):
57
+ # Parse the JSON data after "data: "
58
+ try:
59
+ json_data = json.loads(line[6:]) # Skip "data: " prefix
60
+ if "choices" in json_data and json_data["choices"]:
61
+ if "text" in json_data["choices"][0]:
62
+ print(json_data["choices"][0]["text"], end="", flush=True)
63
+ except json.JSONDecodeError:
64
+ # Handle special case for [DONE]
65
+ if line[6:] == "[DONE]":
66
+ break
67
+ except KeyboardInterrupt:
68
+ print("\nStopped streaming.")
69
+ else:
70
+ print("Error:", response.status_code)
71
+ print(response.text)