Oleh Kuznetsov commited on
Commit
9aa37ee
·
1 Parent(s): 7058ffd

feat(rec): Add vllm inference using Qwen

Browse files
Files changed (7) hide show
  1. .gitignore +2 -1
  2. Dockerfile +16 -4
  3. app.py +64 -2
  4. prompts.py +232 -0
  5. pyproject.toml +3 -0
  6. requirements.txt +413 -13
  7. uv.lock +0 -0
.gitignore CHANGED
@@ -1,2 +1,3 @@
1
  *__pycache__*
2
- .venv
 
 
1
  *__pycache__*
2
+ .venv
3
+ .env
Dockerfile CHANGED
@@ -1,26 +1,38 @@
1
- FROM python:3.12-slim-bookworm
 
 
 
 
 
 
 
 
 
 
2
 
3
  # uv
4
  COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
5
 
6
  RUN useradd -m -u 1000 user
7
 
8
-
9
  # Setup environment
10
  WORKDIR /code
11
  ADD ./requirements.txt /code/requirements.txt
12
- # RUN uv python install 3.12.8
13
  RUN uv pip install --no-cache --system -r /code/requirements.txt
14
 
15
  # Server configurations
16
  EXPOSE 7860
17
 
18
  USER user
 
 
19
  ENV HOME=/home/user \
20
  PATH=/home/user/.local/bin:$PATH
21
 
 
22
  WORKDIR $HOME/app
 
23
  ADD --chown=user ./app.py $HOME/app/app.py
24
 
25
  # Run the application
26
- CMD ["uv", "run", "app.py"]
 
1
+ FROM nvidia/cuda:12.8.1-cudnn-runtime-ubuntu22.04
2
+
3
+ # RUN add-apt-repository ppa:deadsnakes/ppa
4
+ RUN apt-get update && \
5
+ apt-get install -y --no-install-recommends \
6
+ curl \
7
+ build-essential \
8
+ git \
9
+ python3 \
10
+ python3-dev && \
11
+ rm -rf /var/lib/apt/lists/*
12
 
13
  # uv
14
  COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
15
 
16
  RUN useradd -m -u 1000 user
17
 
 
18
  # Setup environment
19
  WORKDIR /code
20
  ADD ./requirements.txt /code/requirements.txt
 
21
  RUN uv pip install --no-cache --system -r /code/requirements.txt
22
 
23
  # Server configurations
24
  EXPOSE 7860
25
 
26
  USER user
27
+
28
+ # Environment variables
29
  ENV HOME=/home/user \
30
  PATH=/home/user/.local/bin:$PATH
31
 
32
+ # Setup application directory
33
  WORKDIR $HOME/app
34
+ ADD --chown=user ./prompts.py $HOME/app/prompts.py
35
  ADD --chown=user ./app.py $HOME/app/app.py
36
 
37
  # Run the application
38
+ CMD ["uv", "run", "--no-cache", "app.py"]
app.py CHANGED
@@ -1,10 +1,72 @@
1
- import gradio as gr
2
  import random
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
 
5
  # Dummy model functions for demonstration
6
  def recommend_sadaimrec(query: str):
7
- return f"SADAIMREC: response to '{query}'"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
 
10
  def recommend_chatgpt(query: str):
 
1
+ import os
2
  import random
3
+ import json
4
+
5
+ import gradio as gr
6
+ from pydantic import BaseModel
7
+ from vllm import LLM, SamplingParams
8
+ from vllm.sampling_params import GuidedDecodingParams
9
+
10
+ from prompts import qp_vllm_user_template
11
+
12
+ VLLM_MODEL_NAME = os.getenv("VLLM_MODEL_NAME")
13
+ VLLM_GPU_MEMORY_UTILIZATION = float(os.getenv("VLLM_GPU_MEMORY_UTILIZATION"))
14
+ VLLM_MAX_SEQ_LEN = int(os.getenv("VLLM_MAX_SEQ_LEN"))
15
+ HF_TOKEN = os.getenv("HF_TOKEN")
16
+
17
+ # -------------------------------- Data Models -------------------------------
18
+ class StructuredQueryRewriteResponse(BaseModel):
19
+ general: str | None
20
+ subjective: str | None
21
+ purpose: str | None
22
+ technical: str | None
23
+ curiosity: str | None
24
+
25
+ class QueryRewrite(BaseModel):
26
+ rewrites: list[str] | None = None
27
+ structured: StructuredQueryRewriteResponse | None = None
28
+
29
+ # -------------------------------- VLLM --------------------------------------
30
+ local_llm = LLM(
31
+ model=VLLM_MODEL_NAME,
32
+ max_model_len=VLLM_MAX_SEQ_LEN,
33
+ gpu_memory_utilization=VLLM_GPU_MEMORY_UTILIZATION,
34
+ hf_token=HF_TOKEN,
35
+ enforce_eager=True,
36
+ )
37
+
38
+ json_schema = StructuredQueryRewriteResponse.model_json_schema()
39
+ guided_decoding_params_json = GuidedDecodingParams(json=json_schema)
40
+ sampling_params_json = SamplingParams(
41
+ guided_decoding=guided_decoding_params_json,
42
+ temperature=0.7,
43
+ top_p=0.8,
44
+ repetition_penalty=1.05,
45
+ max_tokens=1024,
46
+ )
47
+ vllm_system_prompt = (
48
+ "You are a search query optimization assistant built into"
49
+ " music genre search engine, helping users discover novel music genres."
50
+ )
51
 
52
 
53
  # Dummy model functions for demonstration
54
  def recommend_sadaimrec(query: str):
55
+ prompt = qp_vllm_user_template.format(query=query)
56
+ messages = [
57
+ {"role": "system", "content": vllm_system_prompt},
58
+ {"role": "user", "content": prompt},
59
+ ]
60
+ outputs = local_llm.chat(
61
+ messages=messages,
62
+ sampling_params=sampling_params_json,
63
+ )
64
+ rewrite_json = json.loads(outputs[0].outputs[0].text)
65
+ rewrite = QueryRewrite(
66
+ rewrites=[x for x in list(rewrite_json.values()) if x is not None],
67
+ structured=rewrite_json,
68
+ )
69
+ return f"SADAIMREC: response to '{rewrite.model_dump_json(indent=4)}'"
70
 
71
 
72
  def recommend_chatgpt(query: str):
prompts.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ qp_vllm_user_template = """# Purpose and Context
2
+
3
+ Given a user-generated Search Query describing music they wish to explore, you must create a set of short, diverse, search-optimized rewrites that can be issued ALONGSIDE the original query to maximize recall while preserving precision.
4
+
5
+ # Instructions
6
+
7
+ 1. Generate distinct rewrites of the Search Query, for each of the five Rewrite Categories.
8
+ 2. Respond in JSON, adhering strictly to the Reference Output Format.
9
+
10
+ # Music Genre Descriptor Keyword Taxonomy
11
+
12
+ - **Subjective Characteristics**:
13
+ - Emotional & perceptual qualities (uplifting, melancholic, dreamy), thematic resonance
14
+ - Describe the listener's inner feeling
15
+ - **Purpose-Based Characteristics**:
16
+ - Intended context / scenario (workout, study, dinner party)
17
+ - Describes listening setting, context, suitable activities
18
+ - **Technical Characteristics**:
19
+ - Musical & production attributes (instrumentation, timbre, tempo, lo-fi)
20
+ - Describes how the sound is made
21
+
22
+ # Rewrite Categories Specifications
23
+
24
+ - **General Rewrite**:
25
+ - Core/Baseline concise, clean, descriptor-based rewrite of the original query that combines all crucial descriptors available.
26
+ - Must follow exact descriptor wording of the original
27
+ - **Subjective Rewrite**:
28
+ - Concise, clean, descriptor-based rewrite, focused solely on **Subjective Characteristics** descriptors from the original query
29
+ - Must adhere to original wording of the descriptors while incorporating new diverse perspectives to maximize recall and coverage
30
+ - **Purpose Rewrite**:
31
+ - Concise, clean, descriptor-based rewrite, focused solely on **Purpose-Based Characteristics** descriptors from the original query
32
+ - Must adhere to original wording of the descriptors while incorporating new diverse perspectives to maximize recall and coverage
33
+ - **Technical Rewrite**:
34
+ - Concise, clean, descriptor-based rewrite, focused solely on **Technical Characteristics** descriptors from the original query
35
+ - Must adhere to original wording of the descriptors while incorporating new diverse perspectives to maximize recall and coverage
36
+ - **Curiosity-driven Rewrite**:
37
+ - Concise, clean, descriptor-based exploratory rewrite that creatively expands, reinterprets, or provides a curiosity-driven alternative perspective on the original query
38
+ - Must be grounded in original query, but be exploratory in nature, introducing novel semantic information
39
+
40
+ # Rewrite Generation Procedure
41
+
42
+ 1. **Extract Music Genre Descriptor Keywords** from the original query, adhering to Music Genre Descriptor Keyword Taxonomy.
43
+ 2. Formulate a Collection of Rewrites:
44
+ - If the Search Query does not clearly hint at a particular descriptor keyword category, OMIT rewrites focused on that category (Subjective, Purpose-based, Technical).
45
+ - ALWAYS include General and Curiosity-driven rewrites
46
+ - Avoid redundancy; ensure each rewrite provides unique value
47
+ - Maintain alignment with the user's original intent
48
+ - Clarify ambiguities and remove noise
49
+ - Rephrase negations into actionable positives (e.g., "not loud" → "quiet, gentle").
50
+ - Replace vague adjectives with precise descriptors when contextually inferable.
51
+ - Retain poetic or artistic language that strongly conveys a distinct aesthetic or emotional intent.
52
+ - Keep rewrites short, clear, expressive, and optimized for search, no more then 10-15 words.
53
+ - Preserve unique high-entropy descriptors that effectively signal specific musical genres or styles.
54
+
55
+ # Reference Output Format
56
+
57
+ ```json
58
+ {{
59
+ "general": "Concise, clean, descriptor-based rewrite of the original query that combines all crucial descriptors available.",
60
+ "subjective": "Concise, clean, descriptor-based rewrite, focused solely on **Subjective Characteristics** descriptors from the original query (if applicable).",
61
+ "purpose": "Concise, clean, descriptor-based rewrite, focused solely on **Purpose-Based Characteristics** descriptors from the original query (if applicable).",
62
+ "technical": "Concise, clean, descriptor-based rewrite, focused solely on **Technical Characteristics** descriptors from the original query (if applicable).",
63
+ "curiosity": "Concise, clean, descriptor-based exploratory rewrite that creatively expands, reinterprets, or provides a curiosity-driven alternative perspective on the original query."
64
+ }}
65
+ ```
66
+
67
+ # Examples
68
+
69
+ ## Example #1
70
+
71
+ **Search Query**: "I'm trying to find music that's like, energetic but chill at the same time? Maybe something electronic or synth-y, not too loud, but still good to listen to when studying or just relaxing? Nothing aggressive."
72
+
73
+ **Response**:
74
+
75
+ ```json
76
+ {{
77
+ "general": "Chill-energetic electronic synth tracks, gentle volume, study-relax friendly",
78
+ "subjective": "Relaxed yet lively synth-driven melodies and beats",
79
+ "purpose": "Background electro music ideal for studying and unwinding",
80
+ "technical": "Mid-tempo synth textures, smooth pads, restrained percussion",
81
+ "curiosity": "Dreamlike electronica that subtly energizes calm moments"
82
+ }}
83
+ ```
84
+
85
+ ## Example #2
86
+
87
+ **Search Query**: "What's that style where people rap but the beats are really slow and kind of dreamy, lo-fi sounding, very chilled and not flashy?"
88
+
89
+ **Response**:
90
+
91
+ ```json
92
+ {{
93
+ "general": "Slow dreamy lo-fi rap with chilled, subdued beats",
94
+ "subjective": "Chilled introspective verses with gentle vocal delivery",
95
+ "purpose": null,
96
+ "technical": "Lo-fi downtempo percussion, sparse keys, soft basslines",
97
+ "curiosity": "Ambient spoken-word rhythms floating in misty soundscapes"
98
+ }}
99
+ ```
100
+
101
+ ## Example #3
102
+
103
+ **Search Query**: "Can you suggest something that's just... really dark, intense, heavy? Something like heavy metal but slower, with deep vocals, doom-like, you know, just something grim."
104
+
105
+ **Response**:
106
+
107
+ ```json
108
+ {{
109
+ "general": "Slow heavy metal, deep vocals, dark intense doom mood",
110
+ "subjective": "Bleak and crushing riffs delivering oppressive intensity",
111
+ "purpose": null,
112
+ "technical": "Downtuned guitars, slow tempos, sustained distortion, guttural vocals",
113
+ "curiosity": "Dragged-out metallic dirges evoking existential dread"
114
+ }}
115
+ ```
116
+
117
+ ## Example #4
118
+
119
+ **Search Query**: "Relaxing piano music"
120
+
121
+ **Response**:
122
+
123
+ ```json
124
+ {{
125
+ "general": "Relaxing calming solo piano pieces with gentle melodies",
126
+ "subjective": null,
127
+ "purpose": "Quiet background piano for reflection or meditation sessions",
128
+ "technical": "Sparse, slow-tempo piano arrangements with soft dynamics",
129
+ "curiosity": "Minimalist keys exploring silence, resonance, and stillness"
130
+ }}
131
+ ```
132
+
133
+ ## Example #5
134
+
135
+ **Search Query**: "I'm throwing a dinner party—need some elegant background music. Nothing loud or distracting, just classy."
136
+
137
+ **Response**:
138
+
139
+ ```json
140
+ {{
141
+ "general": "Elegant classy background music, subtle volume for dinner",
142
+ "subjective": null,
143
+ "purpose": "Classy ambient soundtrack optimized for sophisticated gatherings",
144
+ "technical": "Soft acoustic instrumentation, low dynamics, tasteful arrangements",
145
+ "curiosity": "Delicate soundscapes adding quiet charm to refined evenings"
146
+ }}
147
+ ```
148
+
149
+ ## Example #6
150
+
151
+ **Search Query**: "Music that's just dreamy and sad, something melancholy."
152
+
153
+ **Response**:
154
+
155
+ ```json
156
+ {{
157
+ "general": "Dreamy melancholic soundscapes evoking gentle sadness",
158
+ "subjective": "Soft, sorrowful textures fostering gentle introspection",
159
+ "purpose": null,
160
+ "technical": null,
161
+ "curiosity": "Ethereal tones merging wistful melodies with subtle haze"
162
+ }}
163
+ ```
164
+
165
+ ## Example #7
166
+
167
+ **Search Query**: "Something calming, very minimalist, just quiet sounds."
168
+
169
+ **Response**:
170
+
171
+ ```json
172
+ {{
173
+ "general": "Calming minimalist quiet music built from delicate tones",
174
+ "subjective": null,
175
+ "purpose": null,
176
+ "technical": "Sparse instrumentation, extended rests, very soft volume dynamics",
177
+ "curiosity": "Experimental near-silence punctuated by graceful sonic wisps"
178
+ }}
179
+ ```
180
+
181
+ ## Example #8
182
+
183
+ **Search Query**: "Looking for loud, energetic dance tracks. Something that just makes you move."
184
+
185
+ **Response**:
186
+
187
+ ```json
188
+ {{
189
+ "general": "Loud high-energy dance tracks engineered to ignite movement",
190
+ "subjective": null,
191
+ "purpose": "Party-ready grooves perfect for vigorous dancing and cardio",
192
+ "technical": "Fast kick drums, driving basslines, dynamic drops, bright leads",
193
+ "curiosity": "Electrifying rhythms unleashing explosive, unexpected groove shifts"
194
+ }}
195
+ ```
196
+
197
+ ## Example #9
198
+
199
+ **Search Query**: "Simple guitar music, maybe acoustic, easygoing."
200
+
201
+ **Response**:
202
+
203
+ ```json
204
+ {{
205
+ "general": "Easygoing simple acoustic guitar pieces with pleasant melodies",
206
+ "subjective": null,
207
+ "purpose": null,
208
+ "technical": "Light strumming patterns, clean chords, relaxed tempo pace",
209
+ "curiosity": "Laid-back six-string sessions exploring warm harmonic simplicity"
210
+ }}
211
+ ```
212
+
213
+ ## Example #10
214
+
215
+ **Search Query**: "Epic cinematic tracks, dramatic and powerful."
216
+
217
+ **Response**:
218
+
219
+ ```json
220
+ {{
221
+ "general": "Epic cinematic tracks with dramatic, powerful orchestral intensity",
222
+ "subjective": "Grand emotional swells evoking awe and tension",
223
+ "purpose": null,
224
+ "technical": "Layered orchestration, booming percussion, soaring brass crescendos",
225
+ "curiosity": "Expansive score fragments exploring colossal orchestral textures"
226
+ }}
227
+ ```
228
+
229
+ # Search Query
230
+
231
+ {query}
232
+ """
pyproject.toml CHANGED
@@ -6,4 +6,7 @@ readme = "README.md"
6
  requires-python = ">=3.12"
7
  dependencies = [
8
  "gradio>=5.29.0",
 
 
 
9
  ]
 
6
  requires-python = ">=3.12"
7
  dependencies = [
8
  "gradio>=5.29.0",
9
+ "openai>=1.77.0",
10
+ "pydantic>=2.11.4",
11
+ "vllm>=0.8.5.post1",
12
  ]
requirements.txt CHANGED
@@ -2,13 +2,34 @@
2
  # uv pip compile pyproject.toml -o requirements.txt
3
  aiofiles==24.1.0
4
  # via gradio
 
 
 
 
 
 
 
 
5
  annotated-types==0.7.0
6
  # via pydantic
7
  anyio==4.9.0
8
  # via
9
  # gradio
10
  # httpx
 
11
  # starlette
 
 
 
 
 
 
 
 
 
 
 
 
12
  certifi==2025.4.26
13
  # via
14
  # httpcore
@@ -18,46 +39,144 @@ charset-normalizer==3.4.2
18
  # via requests
19
  click==8.1.8
20
  # via
 
 
21
  # typer
22
  # uvicorn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  fastapi==0.115.12
24
- # via gradio
 
 
 
 
 
 
25
  ffmpy==0.5.0
26
  # via gradio
27
  filelock==3.18.0
28
- # via huggingface-hub
 
 
 
 
 
 
 
 
 
29
  fsspec==2025.3.2
30
  # via
31
  # gradio-client
32
  # huggingface-hub
 
 
 
 
 
 
 
33
  gradio==5.29.0
34
  # via sdmrec-demo (pyproject.toml)
35
  gradio-client==1.10.0
36
  # via gradio
37
  groovy==0.1.2
38
  # via gradio
 
 
39
  h11==0.16.0
40
  # via
41
  # httpcore
42
  # uvicorn
 
 
43
  httpcore==1.0.9
44
  # via httpx
 
 
45
  httpx==0.28.1
46
  # via
 
47
  # gradio
48
  # gradio-client
 
49
  # safehttpx
50
  huggingface-hub==0.30.2
51
  # via
52
  # gradio
53
  # gradio-client
 
 
 
54
  idna==3.10
55
  # via
56
  # anyio
 
57
  # httpx
58
  # requests
 
 
 
 
 
 
 
 
 
 
59
  jinja2==3.1.6
60
- # via gradio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  markdown-it-py==3.0.0
62
  # via rich
63
  markupsafe==3.0.2
@@ -66,25 +185,174 @@ markupsafe==3.0.2
66
  # jinja2
67
  mdurl==0.1.2
68
  # via markdown-it-py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  numpy==2.2.5
70
  # via
 
 
71
  # gradio
 
 
 
 
72
  # pandas
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  orjson==3.10.18
74
  # via gradio
 
 
 
 
75
  packaging==25.0
76
  # via
77
  # gradio
78
  # gradio-client
79
  # huggingface-hub
 
 
 
80
  pandas==2.2.3
81
  # via gradio
 
 
82
  pillow==11.2.1
83
- # via gradio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  pydantic==2.11.4
85
  # via
 
 
86
  # fastapi
87
  # gradio
 
 
 
 
 
 
88
  pydantic-core==2.33.2
89
  # via pydantic
90
  pydub==0.25.1
@@ -93,40 +361,141 @@ pygments==2.19.1
93
  # via rich
94
  python-dateutil==2.9.0.post0
95
  # via pandas
 
 
 
 
96
  python-multipart==0.0.20
97
- # via gradio
 
 
98
  pytz==2025.2
99
  # via pandas
100
  pyyaml==6.0.2
101
  # via
 
102
  # gradio
103
  # huggingface-hub
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  requests==2.32.3
105
- # via huggingface-hub
 
 
 
 
 
 
 
 
106
  rich==14.0.0
107
- # via typer
 
 
 
 
 
 
 
 
108
  ruff==0.11.8
109
  # via gradio
110
  safehttpx==0.1.6
111
  # via gradio
 
 
 
 
112
  semantic-version==2.10.0
113
  # via gradio
 
 
 
 
 
 
 
 
 
 
114
  shellingham==1.5.4
115
  # via typer
116
  six==1.17.0
117
- # via python-dateutil
 
 
118
  sniffio==1.3.1
119
- # via anyio
 
 
120
  starlette==0.46.2
121
  # via
122
  # fastapi
123
  # gradio
 
 
 
 
 
 
 
 
 
 
 
 
124
  tomlkit==0.13.2
125
  # via gradio
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  tqdm==4.67.1
127
- # via huggingface-hub
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  typer==0.15.3
129
- # via gradio
 
 
130
  typing-extensions==4.13.2
131
  # via
132
  # anyio
@@ -134,10 +503,18 @@ typing-extensions==4.13.2
134
  # gradio
135
  # gradio-client
136
  # huggingface-hub
 
 
 
 
137
  # pydantic
138
  # pydantic-core
 
 
 
139
  # typer
140
  # typing-inspection
 
141
  typing-inspection==0.4.0
142
  # via pydantic
143
  tzdata==2025.2
@@ -145,6 +522,29 @@ tzdata==2025.2
145
  urllib3==2.4.0
146
  # via requests
147
  uvicorn==0.34.2
148
- # via gradio
 
 
 
 
 
 
 
 
 
 
 
149
  websockets==15.0.1
150
- # via gradio-client
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  # uv pip compile pyproject.toml -o requirements.txt
3
  aiofiles==24.1.0
4
  # via gradio
5
+ aiohappyeyeballs==2.6.1
6
+ # via aiohttp
7
+ aiohttp==3.11.18
8
+ # via vllm
9
+ aiosignal==1.3.2
10
+ # via aiohttp
11
+ airportsdata==20250224
12
+ # via outlines
13
  annotated-types==0.7.0
14
  # via pydantic
15
  anyio==4.9.0
16
  # via
17
  # gradio
18
  # httpx
19
+ # openai
20
  # starlette
21
+ # watchfiles
22
+ astor==0.8.1
23
+ # via depyf
24
+ attrs==25.3.0
25
+ # via
26
+ # aiohttp
27
+ # jsonschema
28
+ # referencing
29
+ blake3==1.0.4
30
+ # via vllm
31
+ cachetools==5.5.2
32
+ # via vllm
33
  certifi==2025.4.26
34
  # via
35
  # httpcore
 
39
  # via requests
40
  click==8.1.8
41
  # via
42
+ # ray
43
+ # rich-toolkit
44
  # typer
45
  # uvicorn
46
+ cloudpickle==3.1.1
47
+ # via
48
+ # outlines
49
+ # vllm
50
+ compressed-tensors==0.9.3
51
+ # via vllm
52
+ cupy-cuda12x==13.4.1
53
+ # via ray
54
+ deprecated==1.2.18
55
+ # via
56
+ # opentelemetry-api
57
+ # opentelemetry-exporter-otlp-proto-grpc
58
+ # opentelemetry-exporter-otlp-proto-http
59
+ # opentelemetry-semantic-conventions
60
+ depyf==0.18.0
61
+ # via vllm
62
+ dill==0.4.0
63
+ # via depyf
64
+ diskcache==5.6.3
65
+ # via outlines
66
+ distro==1.9.0
67
+ # via openai
68
+ dnspython==2.7.0
69
+ # via email-validator
70
+ einops==0.8.1
71
+ # via vllm
72
+ email-validator==2.2.0
73
+ # via fastapi
74
  fastapi==0.115.12
75
+ # via
76
+ # gradio
77
+ # vllm
78
+ fastapi-cli==0.0.7
79
+ # via fastapi
80
+ fastrlock==0.8.3
81
+ # via cupy-cuda12x
82
  ffmpy==0.5.0
83
  # via gradio
84
  filelock==3.18.0
85
+ # via
86
+ # huggingface-hub
87
+ # ray
88
+ # torch
89
+ # transformers
90
+ # vllm
91
+ frozenlist==1.6.0
92
+ # via
93
+ # aiohttp
94
+ # aiosignal
95
  fsspec==2025.3.2
96
  # via
97
  # gradio-client
98
  # huggingface-hub
99
+ # torch
100
+ gguf==0.16.2
101
+ # via vllm
102
+ googleapis-common-protos==1.70.0
103
+ # via
104
+ # opentelemetry-exporter-otlp-proto-grpc
105
+ # opentelemetry-exporter-otlp-proto-http
106
  gradio==5.29.0
107
  # via sdmrec-demo (pyproject.toml)
108
  gradio-client==1.10.0
109
  # via gradio
110
  groovy==0.1.2
111
  # via gradio
112
+ grpcio==1.71.0
113
+ # via opentelemetry-exporter-otlp-proto-grpc
114
  h11==0.16.0
115
  # via
116
  # httpcore
117
  # uvicorn
118
+ hf-xet==1.1.0
119
+ # via huggingface-hub
120
  httpcore==1.0.9
121
  # via httpx
122
+ httptools==0.6.4
123
+ # via uvicorn
124
  httpx==0.28.1
125
  # via
126
+ # fastapi
127
  # gradio
128
  # gradio-client
129
+ # openai
130
  # safehttpx
131
  huggingface-hub==0.30.2
132
  # via
133
  # gradio
134
  # gradio-client
135
+ # tokenizers
136
+ # transformers
137
+ # vllm
138
  idna==3.10
139
  # via
140
  # anyio
141
+ # email-validator
142
  # httpx
143
  # requests
144
+ # yarl
145
+ importlib-metadata==8.0.0
146
+ # via
147
+ # opentelemetry-api
148
+ # vllm
149
+ interegular==0.3.3
150
+ # via
151
+ # lm-format-enforcer
152
+ # outlines
153
+ # outlines-core
154
  jinja2==3.1.6
155
+ # via
156
+ # fastapi
157
+ # gradio
158
+ # outlines
159
+ # torch
160
+ jiter==0.9.0
161
+ # via openai
162
+ jsonschema==4.23.0
163
+ # via
164
+ # mistral-common
165
+ # outlines
166
+ # outlines-core
167
+ # ray
168
+ jsonschema-specifications==2025.4.1
169
+ # via jsonschema
170
+ lark==1.2.2
171
+ # via
172
+ # outlines
173
+ # vllm
174
+ llguidance==0.7.19
175
+ # via vllm
176
+ llvmlite==0.44.0
177
+ # via numba
178
+ lm-format-enforcer==0.10.11
179
+ # via vllm
180
  markdown-it-py==3.0.0
181
  # via rich
182
  markupsafe==3.0.2
 
185
  # jinja2
186
  mdurl==0.1.2
187
  # via markdown-it-py
188
+ mistral-common==1.5.4
189
+ # via vllm
190
+ mpmath==1.3.0
191
+ # via sympy
192
+ msgpack==1.1.0
193
+ # via ray
194
+ msgspec==0.19.0
195
+ # via vllm
196
+ multidict==6.4.3
197
+ # via
198
+ # aiohttp
199
+ # yarl
200
+ nest-asyncio==1.6.0
201
+ # via outlines
202
+ networkx==3.4.2
203
+ # via torch
204
+ ninja==1.11.1.4
205
+ # via
206
+ # vllm
207
+ # xgrammar
208
+ numba==0.61.2
209
+ # via vllm
210
  numpy==2.2.5
211
  # via
212
+ # cupy-cuda12x
213
+ # gguf
214
  # gradio
215
+ # mistral-common
216
+ # numba
217
+ # opencv-python-headless
218
+ # outlines
219
  # pandas
220
+ # scipy
221
+ # torchvision
222
+ # transformers
223
+ # vllm
224
+ # xformers
225
+ nvidia-cublas-cu12==12.4.5.8
226
+ # via
227
+ # nvidia-cudnn-cu12
228
+ # nvidia-cusolver-cu12
229
+ # torch
230
+ nvidia-cuda-cupti-cu12==12.4.127
231
+ # via torch
232
+ nvidia-cuda-nvrtc-cu12==12.4.127
233
+ # via torch
234
+ nvidia-cuda-runtime-cu12==12.4.127
235
+ # via torch
236
+ nvidia-cudnn-cu12==9.1.0.70
237
+ # via torch
238
+ nvidia-cufft-cu12==11.2.1.3
239
+ # via torch
240
+ nvidia-curand-cu12==10.3.5.147
241
+ # via torch
242
+ nvidia-cusolver-cu12==11.6.1.9
243
+ # via torch
244
+ nvidia-cusparse-cu12==12.3.1.170
245
+ # via
246
+ # nvidia-cusolver-cu12
247
+ # torch
248
+ nvidia-cusparselt-cu12==0.6.2
249
+ # via torch
250
+ nvidia-nccl-cu12==2.21.5
251
+ # via torch
252
+ nvidia-nvjitlink-cu12==12.4.127
253
+ # via
254
+ # nvidia-cusolver-cu12
255
+ # nvidia-cusparse-cu12
256
+ # torch
257
+ nvidia-nvtx-cu12==12.4.127
258
+ # via torch
259
+ openai==1.77.0
260
+ # via
261
+ # sdmrec-demo (pyproject.toml)
262
+ # vllm
263
+ opencv-python-headless==4.11.0.86
264
+ # via
265
+ # mistral-common
266
+ # vllm
267
+ opentelemetry-api==1.26.0
268
+ # via
269
+ # opentelemetry-exporter-otlp-proto-grpc
270
+ # opentelemetry-exporter-otlp-proto-http
271
+ # opentelemetry-sdk
272
+ # opentelemetry-semantic-conventions
273
+ # vllm
274
+ opentelemetry-exporter-otlp==1.26.0
275
+ # via vllm
276
+ opentelemetry-exporter-otlp-proto-common==1.26.0
277
+ # via
278
+ # opentelemetry-exporter-otlp-proto-grpc
279
+ # opentelemetry-exporter-otlp-proto-http
280
+ opentelemetry-exporter-otlp-proto-grpc==1.26.0
281
+ # via opentelemetry-exporter-otlp
282
+ opentelemetry-exporter-otlp-proto-http==1.26.0
283
+ # via opentelemetry-exporter-otlp
284
+ opentelemetry-proto==1.26.0
285
+ # via
286
+ # opentelemetry-exporter-otlp-proto-common
287
+ # opentelemetry-exporter-otlp-proto-grpc
288
+ # opentelemetry-exporter-otlp-proto-http
289
+ opentelemetry-sdk==1.26.0
290
+ # via
291
+ # opentelemetry-exporter-otlp-proto-grpc
292
+ # opentelemetry-exporter-otlp-proto-http
293
+ # vllm
294
+ opentelemetry-semantic-conventions==0.47b0
295
+ # via opentelemetry-sdk
296
+ opentelemetry-semantic-conventions-ai==0.4.5
297
+ # via vllm
298
  orjson==3.10.18
299
  # via gradio
300
+ outlines==0.1.11
301
+ # via vllm
302
+ outlines-core==0.1.26
303
+ # via outlines
304
  packaging==25.0
305
  # via
306
  # gradio
307
  # gradio-client
308
  # huggingface-hub
309
+ # lm-format-enforcer
310
+ # ray
311
+ # transformers
312
  pandas==2.2.3
313
  # via gradio
314
+ partial-json-parser==0.2.1.1.post5
315
+ # via vllm
316
  pillow==11.2.1
317
+ # via
318
+ # gradio
319
+ # mistral-common
320
+ # torchvision
321
+ # vllm
322
+ prometheus-client==0.21.1
323
+ # via
324
+ # prometheus-fastapi-instrumentator
325
+ # vllm
326
+ prometheus-fastapi-instrumentator==7.1.0
327
+ # via vllm
328
+ propcache==0.3.1
329
+ # via
330
+ # aiohttp
331
+ # yarl
332
+ protobuf==4.25.7
333
+ # via
334
+ # googleapis-common-protos
335
+ # opentelemetry-proto
336
+ # ray
337
+ # vllm
338
+ psutil==7.0.0
339
+ # via vllm
340
+ py-cpuinfo==9.0.0
341
+ # via vllm
342
+ pycountry==24.6.1
343
+ # via outlines
344
  pydantic==2.11.4
345
  # via
346
+ # sdmrec-demo (pyproject.toml)
347
+ # compressed-tensors
348
  # fastapi
349
  # gradio
350
+ # lm-format-enforcer
351
+ # mistral-common
352
+ # openai
353
+ # outlines
354
+ # vllm
355
+ # xgrammar
356
  pydantic-core==2.33.2
357
  # via pydantic
358
  pydub==0.25.1
 
361
  # via rich
362
  python-dateutil==2.9.0.post0
363
  # via pandas
364
+ python-dotenv==1.1.0
365
+ # via uvicorn
366
+ python-json-logger==3.3.0
367
+ # via vllm
368
  python-multipart==0.0.20
369
+ # via
370
+ # fastapi
371
+ # gradio
372
  pytz==2025.2
373
  # via pandas
374
  pyyaml==6.0.2
375
  # via
376
+ # gguf
377
  # gradio
378
  # huggingface-hub
379
+ # lm-format-enforcer
380
+ # ray
381
+ # transformers
382
+ # uvicorn
383
+ # vllm
384
+ pyzmq==26.4.0
385
+ # via vllm
386
+ ray==2.45.0
387
+ # via vllm
388
+ referencing==0.36.2
389
+ # via
390
+ # jsonschema
391
+ # jsonschema-specifications
392
+ # outlines
393
+ regex==2024.11.6
394
+ # via
395
+ # tiktoken
396
+ # transformers
397
  requests==2.32.3
398
+ # via
399
+ # huggingface-hub
400
+ # mistral-common
401
+ # opentelemetry-exporter-otlp-proto-http
402
+ # outlines
403
+ # ray
404
+ # tiktoken
405
+ # transformers
406
+ # vllm
407
  rich==14.0.0
408
+ # via
409
+ # rich-toolkit
410
+ # typer
411
+ rich-toolkit==0.14.5
412
+ # via fastapi-cli
413
+ rpds-py==0.24.0
414
+ # via
415
+ # jsonschema
416
+ # referencing
417
  ruff==0.11.8
418
  # via gradio
419
  safehttpx==0.1.6
420
  # via gradio
421
+ safetensors==0.5.3
422
+ # via transformers
423
+ scipy==1.15.2
424
+ # via vllm
425
  semantic-version==2.10.0
426
  # via gradio
427
+ sentencepiece==0.2.0
428
+ # via
429
+ # gguf
430
+ # mistral-common
431
+ # vllm
432
+ # xgrammar
433
+ setuptools==80.3.1
434
+ # via
435
+ # torch
436
+ # vllm
437
  shellingham==1.5.4
438
  # via typer
439
  six==1.17.0
440
+ # via
441
+ # python-dateutil
442
+ # vllm
443
  sniffio==1.3.1
444
+ # via
445
+ # anyio
446
+ # openai
447
  starlette==0.46.2
448
  # via
449
  # fastapi
450
  # gradio
451
+ # prometheus-fastapi-instrumentator
452
+ sympy==1.13.1
453
+ # via torch
454
+ tiktoken==0.9.0
455
+ # via
456
+ # mistral-common
457
+ # vllm
458
+ # xgrammar
459
+ tokenizers==0.21.1
460
+ # via
461
+ # transformers
462
+ # vllm
463
  tomlkit==0.13.2
464
  # via gradio
465
+ torch==2.6.0
466
+ # via
467
+ # compressed-tensors
468
+ # outlines
469
+ # torchaudio
470
+ # torchvision
471
+ # vllm
472
+ # xformers
473
+ # xgrammar
474
+ torchaudio==2.6.0
475
+ # via vllm
476
+ torchvision==0.21.0
477
+ # via vllm
478
  tqdm==4.67.1
479
+ # via
480
+ # gguf
481
+ # huggingface-hub
482
+ # openai
483
+ # outlines
484
+ # transformers
485
+ # vllm
486
+ transformers==4.51.3
487
+ # via
488
+ # compressed-tensors
489
+ # vllm
490
+ # xgrammar
491
+ triton==3.2.0
492
+ # via
493
+ # torch
494
+ # xgrammar
495
  typer==0.15.3
496
+ # via
497
+ # fastapi-cli
498
+ # gradio
499
  typing-extensions==4.13.2
500
  # via
501
  # anyio
 
503
  # gradio
504
  # gradio-client
505
  # huggingface-hub
506
+ # mistral-common
507
+ # openai
508
+ # opentelemetry-sdk
509
+ # outlines
510
  # pydantic
511
  # pydantic-core
512
+ # referencing
513
+ # rich-toolkit
514
+ # torch
515
  # typer
516
  # typing-inspection
517
+ # vllm
518
  typing-inspection==0.4.0
519
  # via pydantic
520
  tzdata==2025.2
 
522
  urllib3==2.4.0
523
  # via requests
524
  uvicorn==0.34.2
525
+ # via
526
+ # fastapi
527
+ # fastapi-cli
528
+ # gradio
529
+ uvloop==0.21.0
530
+ # via uvicorn
531
+ vllm==0.8.5.post1
532
+ # via sdmrec-demo (pyproject.toml)
533
+ watchfiles==1.0.5
534
+ # via
535
+ # uvicorn
536
+ # vllm
537
  websockets==15.0.1
538
+ # via
539
+ # gradio-client
540
+ # uvicorn
541
+ wrapt==1.17.2
542
+ # via deprecated
543
+ xformers==0.0.29.post2
544
+ # via vllm
545
+ xgrammar==0.1.18
546
+ # via vllm
547
+ yarl==1.20.0
548
+ # via aiohttp
549
+ zipp==3.21.0
550
+ # via importlib-metadata
uv.lock CHANGED
The diff for this file is too large to render. See raw diff