Spaces:

sadaisystems
/

sdmrec-docker

Paused

App Files Files Community

Oleh Kuznetsov commited on 8 days ago

Commit

9aa37ee

1 Parent(s): 7058ffd

feat(rec): Add vllm inference using Qwen

Browse files

Files changed (7) hide show

.gitignore +2 -1
Dockerfile +16 -4
app.py +64 -2
prompts.py +232 -0
pyproject.toml +3 -0
requirements.txt +413 -13
uv.lock +0 -0

.gitignore CHANGED Viewed

@@ -1,2 +1,3 @@
 *__pycache__*
-.venv

 *__pycache__*
+.venv
+.env

Dockerfile CHANGED Viewed

@@ -1,26 +1,38 @@
-FROM python:3.12-slim-bookworm
 # uv
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
 RUN useradd -m -u 1000 user
 # Setup environment
 WORKDIR /code
 ADD ./requirements.txt /code/requirements.txt
-# RUN uv python install 3.12.8
 RUN uv pip install --no-cache --system -r /code/requirements.txt
 # Server configurations
 EXPOSE 7860
 USER user
 ENV HOME=/home/user \
 PATH=/home/user/.local/bin:$PATH
 WORKDIR $HOME/app
 ADD --chown=user ./app.py $HOME/app/app.py
 # Run the application
-CMD ["uv", "run", "app.py"]

+FROM nvidia/cuda:12.8.1-cudnn-runtime-ubuntu22.04
+# RUN add-apt-repository ppa:deadsnakes/ppa
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    curl \
+    build-essential \
+    git \
+    python3 \
+    python3-dev && \
+    rm -rf /var/lib/apt/lists/*
 # uv
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
 RUN useradd -m -u 1000 user
 # Setup environment
 WORKDIR /code
 ADD ./requirements.txt /code/requirements.txt
 RUN uv pip install --no-cache --system -r /code/requirements.txt
 # Server configurations
 EXPOSE 7860
 USER user
+# Environment variables
 ENV HOME=/home/user \
 PATH=/home/user/.local/bin:$PATH
+# Setup application directory
 WORKDIR $HOME/app
+ADD --chown=user ./prompts.py $HOME/app/prompts.py
 ADD --chown=user ./app.py $HOME/app/app.py
 # Run the application
+CMD ["uv", "run", "--no-cache", "app.py"]

app.py CHANGED Viewed

@@ -1,10 +1,72 @@
-import gradio as gr
 import random
 # Dummy model functions for demonstration
 def recommend_sadaimrec(query: str):
-    return f"SADAIMREC: response to '{query}'"
 def recommend_chatgpt(query: str):

+import os
 import random
+import json
+import gradio as gr
+from pydantic import BaseModel
+from vllm import LLM, SamplingParams
+from vllm.sampling_params import GuidedDecodingParams
+from prompts import qp_vllm_user_template
+VLLM_MODEL_NAME = os.getenv("VLLM_MODEL_NAME")
+VLLM_GPU_MEMORY_UTILIZATION = float(os.getenv("VLLM_GPU_MEMORY_UTILIZATION"))
+VLLM_MAX_SEQ_LEN = int(os.getenv("VLLM_MAX_SEQ_LEN"))
+HF_TOKEN = os.getenv("HF_TOKEN")
+# --------------------------------  Data Models  -------------------------------
+class StructuredQueryRewriteResponse(BaseModel):
+    general: str | None
+    subjective: str | None
+    purpose: str | None
+    technical: str | None
+    curiosity: str | None
+class QueryRewrite(BaseModel):
+    rewrites: list[str] | None = None
+    structured: StructuredQueryRewriteResponse | None = None
+# --------------------------------  VLLM  --------------------------------------
+local_llm = LLM(
+    model=VLLM_MODEL_NAME,
+    max_model_len=VLLM_MAX_SEQ_LEN,
+    gpu_memory_utilization=VLLM_GPU_MEMORY_UTILIZATION,
+    hf_token=HF_TOKEN,
+    enforce_eager=True,
+)
+json_schema = StructuredQueryRewriteResponse.model_json_schema()
+guided_decoding_params_json = GuidedDecodingParams(json=json_schema)
+sampling_params_json = SamplingParams(
+    guided_decoding=guided_decoding_params_json,
+    temperature=0.7,
+    top_p=0.8,
+    repetition_penalty=1.05,
+    max_tokens=1024,
+)
+vllm_system_prompt = (
+    "You are a search query optimization assistant built into"
+    " music genre search engine, helping users discover novel music genres."
+)
 # Dummy model functions for demonstration
 def recommend_sadaimrec(query: str):
+    prompt = qp_vllm_user_template.format(query=query)
+    messages = [
+        {"role": "system", "content": vllm_system_prompt},
+        {"role": "user", "content": prompt},
+    ]
+    outputs = local_llm.chat(
+        messages=messages,
+        sampling_params=sampling_params_json,
+    )
+    rewrite_json = json.loads(outputs[0].outputs[0].text)
+    rewrite = QueryRewrite(
+        rewrites=[x for x in list(rewrite_json.values()) if x is not None],
+        structured=rewrite_json,
+    )
+    return f"SADAIMREC: response to '{rewrite.model_dump_json(indent=4)}'"
 def recommend_chatgpt(query: str):

prompts.py ADDED Viewed

	@@ -0,0 +1,232 @@

+qp_vllm_user_template = """# Purpose and Context
+Given a user-generated Search Query describing music they wish to explore, you must create a set of short, diverse, search-optimized rewrites that can be issued ALONGSIDE the original query to maximize recall while preserving precision.
+# Instructions
+1. Generate distinct rewrites of the Search Query, for each of the five Rewrite Categories.
+2. Respond in JSON, adhering strictly to the Reference Output Format.
+# Music Genre Descriptor Keyword Taxonomy
+- **Subjective Characteristics**:
+  - Emotional & perceptual qualities (uplifting, melancholic, dreamy), thematic resonance
+  - Describe the listener's inner feeling
+- **Purpose-Based Characteristics**:
+  - Intended context / scenario (workout, study, dinner party)
+  - Describes listening setting, context, suitable activities
+- **Technical Characteristics**:
+  - Musical & production attributes (instrumentation, timbre, tempo, lo-fi)
+  - Describes how the sound is made
+# Rewrite Categories Specifications
+- **General Rewrite**:
+  - Core/Baseline concise, clean, descriptor-based rewrite of the original query that combines all crucial descriptors available.
+  - Must follow exact descriptor wording of the original
+- **Subjective Rewrite**:
+  - Concise, clean, descriptor-based rewrite, focused solely on **Subjective Characteristics** descriptors from the original query
+  - Must adhere to original wording of the descriptors while incorporating new diverse perspectives to maximize recall and coverage
+- **Purpose Rewrite**:
+  - Concise, clean, descriptor-based rewrite, focused solely on **Purpose-Based Characteristics** descriptors from the original query
+  - Must adhere to original wording of the descriptors while incorporating new diverse perspectives to maximize recall and coverage
+- **Technical Rewrite**:
+  - Concise, clean, descriptor-based rewrite, focused solely on **Technical Characteristics** descriptors from the original query
+  - Must adhere to original wording of the descriptors while incorporating new diverse perspectives to maximize recall and coverage
+- **Curiosity-driven Rewrite**:
+  - Concise, clean, descriptor-based exploratory rewrite that creatively expands, reinterprets, or provides a curiosity-driven alternative perspective on the original query
+  - Must be grounded in original query, but be exploratory in nature, introducing novel semantic information
+# Rewrite Generation Procedure
+1. **Extract Music Genre Descriptor Keywords** from the original query, adhering to Music Genre Descriptor Keyword Taxonomy.
+2. Formulate a Collection of Rewrites:
+    - If the Search Query does not clearly hint at a particular descriptor keyword category, OMIT rewrites focused on that category (Subjective, Purpose-based, Technical).
+    - ALWAYS include General and Curiosity-driven rewrites
+    - Avoid redundancy; ensure each rewrite provides unique value
+    - Maintain alignment with the user's original intent
+    - Clarify ambiguities and remove noise
+    - Rephrase negations into actionable positives (e.g., "not loud" → "quiet, gentle").
+    - Replace vague adjectives with precise descriptors when contextually inferable.
+    - Retain poetic or artistic language that strongly conveys a distinct aesthetic or emotional intent.
+    - Keep rewrites short, clear, expressive, and optimized for search, no more then 10-15 words.
+    - Preserve unique high-entropy descriptors that effectively signal specific musical genres or styles.
+# Reference Output Format
+```json
+{{
+    "general": "Concise, clean, descriptor-based rewrite of the original query that combines all crucial descriptors available.",
+    "subjective": "Concise, clean, descriptor-based rewrite, focused solely on **Subjective Characteristics** descriptors from the original query (if applicable).",
+    "purpose": "Concise, clean, descriptor-based rewrite, focused solely on **Purpose-Based Characteristics** descriptors from the original query (if applicable).",
+    "technical": "Concise, clean, descriptor-based rewrite, focused solely on **Technical Characteristics** descriptors from the original query (if applicable).",
+    "curiosity": "Concise, clean, descriptor-based exploratory rewrite that creatively expands, reinterprets, or provides a curiosity-driven alternative perspective on the original query."
+}}
+```
+# Examples
+## Example #1
+**Search Query**: "I'm trying to find music that's like, energetic but chill at the same time? Maybe something electronic or synth-y, not too loud, but still good to listen to when studying or just relaxing? Nothing aggressive."
+**Response**:
+```json
+{{
+    "general": "Chill-energetic electronic synth tracks, gentle volume, study-relax friendly",
+    "subjective": "Relaxed yet lively synth-driven melodies and beats",
+    "purpose": "Background electro music ideal for studying and unwinding",
+    "technical": "Mid-tempo synth textures, smooth pads, restrained percussion",
+    "curiosity": "Dreamlike electronica that subtly energizes calm moments"
+}}
+```
+## Example #2
+**Search Query**: "What's that style where people rap but the beats are really slow and kind of dreamy, lo-fi sounding, very chilled and not flashy?"
+**Response**:
+```json
+{{
+    "general": "Slow dreamy lo-fi rap with chilled, subdued beats",
+    "subjective": "Chilled introspective verses with gentle vocal delivery",
+    "purpose": null,
+    "technical": "Lo-fi downtempo percussion, sparse keys, soft basslines",
+    "curiosity": "Ambient spoken-word rhythms floating in misty soundscapes"
+}}
+```
+## Example #3
+**Search Query**: "Can you suggest something that's just... really dark, intense, heavy? Something like heavy metal but slower, with deep vocals, doom-like, you know, just something grim."
+**Response**:
+```json
+{{
+    "general": "Slow heavy metal, deep vocals, dark intense doom mood",
+    "subjective": "Bleak and crushing riffs delivering oppressive intensity",
+    "purpose": null,
+    "technical": "Downtuned guitars, slow tempos, sustained distortion, guttural vocals",
+    "curiosity": "Dragged-out metallic dirges evoking existential dread"
+}}
+```
+## Example #4
+**Search Query**: "Relaxing piano music"
+**Response**:
+```json
+{{
+    "general": "Relaxing calming solo piano pieces with gentle melodies",
+    "subjective": null,
+    "purpose": "Quiet background piano for reflection or meditation sessions",
+    "technical": "Sparse, slow-tempo piano arrangements with soft dynamics",
+    "curiosity": "Minimalist keys exploring silence, resonance, and stillness"
+}}
+```
+## Example #5
+**Search Query**: "I'm throwing a dinner party—need some elegant background music. Nothing loud or distracting, just classy."
+**Response**:
+```json
+{{
+    "general": "Elegant classy background music, subtle volume for dinner",
+    "subjective": null,
+    "purpose": "Classy ambient soundtrack optimized for sophisticated gatherings",
+    "technical": "Soft acoustic instrumentation, low dynamics, tasteful arrangements",
+    "curiosity": "Delicate soundscapes adding quiet charm to refined evenings"
+}}
+```
+## Example #6
+**Search Query**: "Music that's just dreamy and sad, something melancholy."
+**Response**:
+```json
+{{
+    "general": "Dreamy melancholic soundscapes evoking gentle sadness",
+    "subjective": "Soft, sorrowful textures fostering gentle introspection",
+    "purpose": null,
+    "technical": null,
+    "curiosity": "Ethereal tones merging wistful melodies with subtle haze"
+}}
+```
+## Example #7
+**Search Query**: "Something calming, very minimalist, just quiet sounds."
+**Response**:
+```json
+{{
+    "general": "Calming minimalist quiet music built from delicate tones",
+    "subjective": null,
+    "purpose": null,
+    "technical": "Sparse instrumentation, extended rests, very soft volume dynamics",
+    "curiosity": "Experimental near-silence punctuated by graceful sonic wisps"
+}}
+```
+## Example #8
+**Search Query**: "Looking for loud, energetic dance tracks. Something that just makes you move."
+**Response**:
+```json
+{{
+    "general": "Loud high-energy dance tracks engineered to ignite movement",
+    "subjective": null,
+    "purpose": "Party-ready grooves perfect for vigorous dancing and cardio",
+    "technical": "Fast kick drums, driving basslines, dynamic drops, bright leads",
+    "curiosity": "Electrifying rhythms unleashing explosive, unexpected groove shifts"
+}}
+```
+## Example #9
+**Search Query**: "Simple guitar music, maybe acoustic, easygoing."
+**Response**:
+```json
+{{
+    "general": "Easygoing simple acoustic guitar pieces with pleasant melodies",
+    "subjective": null,
+    "purpose": null,
+    "technical": "Light strumming patterns, clean chords, relaxed tempo pace",
+    "curiosity": "Laid-back six-string sessions exploring warm harmonic simplicity"
+}}
+```
+## Example #10
+**Search Query**: "Epic cinematic tracks, dramatic and powerful."
+**Response**:
+```json
+{{
+    "general": "Epic cinematic tracks with dramatic, powerful orchestral intensity",
+    "subjective": "Grand emotional swells evoking awe and tension",
+    "purpose": null,
+    "technical": "Layered orchestration, booming percussion, soaring brass crescendos",
+    "curiosity": "Expansive score fragments exploring colossal orchestral textures"
+}}
+```
+# Search Query
+{query}
+"""

pyproject.toml CHANGED Viewed

@@ -6,4 +6,7 @@ readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
     "gradio>=5.29.0",
 ]

 requires-python = ">=3.12"
 dependencies = [
     "gradio>=5.29.0",
+    "openai>=1.77.0",
+    "pydantic>=2.11.4",
+    "vllm>=0.8.5.post1",
 ]

requirements.txt CHANGED Viewed

@@ -2,13 +2,34 @@
 #    uv pip compile pyproject.toml -o requirements.txt
 aiofiles==24.1.0
     # via gradio
 annotated-types==0.7.0
     # via pydantic
 anyio==4.9.0
     # via
     #   gradio
     #   httpx
     #   starlette
 certifi==2025.4.26
     # via
     #   httpcore
@@ -18,46 +39,144 @@ charset-normalizer==3.4.2
     # via requests
 click==8.1.8
     # via
     #   typer
     #   uvicorn
 fastapi==0.115.12
-    # via gradio
 ffmpy==0.5.0
     # via gradio
 filelock==3.18.0
-    # via huggingface-hub
 fsspec==2025.3.2
     # via
     #   gradio-client
     #   huggingface-hub
 gradio==5.29.0
     # via sdmrec-demo (pyproject.toml)
 gradio-client==1.10.0
     # via gradio
 groovy==0.1.2
     # via gradio
 h11==0.16.0
     # via
     #   httpcore
     #   uvicorn
 httpcore==1.0.9
     # via httpx
 httpx==0.28.1
     # via
     #   gradio
     #   gradio-client
     #   safehttpx
 huggingface-hub==0.30.2
     # via
     #   gradio
     #   gradio-client
 idna==3.10
     # via
     #   anyio
     #   httpx
     #   requests
 jinja2==3.1.6
-    # via gradio
 markdown-it-py==3.0.0
     # via rich
 markupsafe==3.0.2
@@ -66,25 +185,174 @@ markupsafe==3.0.2
     #   jinja2
 mdurl==0.1.2
     # via markdown-it-py
 numpy==2.2.5
     # via
     #   gradio
     #   pandas
 orjson==3.10.18
     # via gradio
 packaging==25.0
     # via
     #   gradio
     #   gradio-client
     #   huggingface-hub
 pandas==2.2.3
     # via gradio
 pillow==11.2.1
-    # via gradio
 pydantic==2.11.4
     # via
     #   fastapi
     #   gradio
 pydantic-core==2.33.2
     # via pydantic
 pydub==0.25.1
@@ -93,40 +361,141 @@ pygments==2.19.1
     # via rich
 python-dateutil==2.9.0.post0
     # via pandas
 python-multipart==0.0.20
-    # via gradio
 pytz==2025.2
     # via pandas
 pyyaml==6.0.2
     # via
     #   gradio
     #   huggingface-hub
 requests==2.32.3
-    # via huggingface-hub
 rich==14.0.0
-    # via typer
 ruff==0.11.8
     # via gradio
 safehttpx==0.1.6
     # via gradio
 semantic-version==2.10.0
     # via gradio
 shellingham==1.5.4
     # via typer
 six==1.17.0
-    # via python-dateutil
 sniffio==1.3.1
-    # via anyio
 starlette==0.46.2
     # via
     #   fastapi
     #   gradio
 tomlkit==0.13.2
     # via gradio
 tqdm==4.67.1
-    # via huggingface-hub
 typer==0.15.3
-    # via gradio
 typing-extensions==4.13.2
     # via
     #   anyio
@@ -134,10 +503,18 @@ typing-extensions==4.13.2
     #   gradio
     #   gradio-client
     #   huggingface-hub
     #   pydantic
     #   pydantic-core
     #   typer
     #   typing-inspection
 typing-inspection==0.4.0
     # via pydantic
 tzdata==2025.2
@@ -145,6 +522,29 @@ tzdata==2025.2
 urllib3==2.4.0
     # via requests
 uvicorn==0.34.2
-    # via gradio
 websockets==15.0.1
-    # via gradio-client

 #    uv pip compile pyproject.toml -o requirements.txt
 aiofiles==24.1.0
     # via gradio
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.11.18
+    # via vllm
+aiosignal==1.3.2
+    # via aiohttp
+airportsdata==20250224
+    # via outlines
 annotated-types==0.7.0
     # via pydantic
 anyio==4.9.0
     # via
     #   gradio
     #   httpx
+    #   openai
     #   starlette
+    #   watchfiles
+astor==0.8.1
+    # via depyf
+attrs==25.3.0
+    # via
+    #   aiohttp
+    #   jsonschema
+    #   referencing
+blake3==1.0.4
+    # via vllm
+cachetools==5.5.2
+    # via vllm
 certifi==2025.4.26
     # via
     #   httpcore
     # via requests
 click==8.1.8
     # via
+    #   ray
+    #   rich-toolkit
     #   typer
     #   uvicorn
+cloudpickle==3.1.1
+    # via
+    #   outlines
+    #   vllm
+compressed-tensors==0.9.3
+    # via vllm
+cupy-cuda12x==13.4.1
+    # via ray
+deprecated==1.2.18
+    # via
+    #   opentelemetry-api
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-semantic-conventions
+depyf==0.18.0
+    # via vllm
+dill==0.4.0
+    # via depyf
+diskcache==5.6.3
+    # via outlines
+distro==1.9.0
+    # via openai
+dnspython==2.7.0
+    # via email-validator
+einops==0.8.1
+    # via vllm
+email-validator==2.2.0
+    # via fastapi
 fastapi==0.115.12
+    # via
+    #   gradio
+    #   vllm
+fastapi-cli==0.0.7
+    # via fastapi
+fastrlock==0.8.3
+    # via cupy-cuda12x
 ffmpy==0.5.0
     # via gradio
 filelock==3.18.0
+    # via
+    #   huggingface-hub
+    #   ray
+    #   torch
+    #   transformers
+    #   vllm
+frozenlist==1.6.0
+    # via
+    #   aiohttp
+    #   aiosignal
 fsspec==2025.3.2
     # via
     #   gradio-client
     #   huggingface-hub
+    #   torch
+gguf==0.16.2
+    # via vllm
+googleapis-common-protos==1.70.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
 gradio==5.29.0
     # via sdmrec-demo (pyproject.toml)
 gradio-client==1.10.0
     # via gradio
 groovy==0.1.2
     # via gradio
+grpcio==1.71.0
+    # via opentelemetry-exporter-otlp-proto-grpc
 h11==0.16.0
     # via
     #   httpcore
     #   uvicorn
+hf-xet==1.1.0
+    # via huggingface-hub
 httpcore==1.0.9
     # via httpx
+httptools==0.6.4
+    # via uvicorn
 httpx==0.28.1
     # via
+    #   fastapi
     #   gradio
     #   gradio-client
+    #   openai
     #   safehttpx
 huggingface-hub==0.30.2
     # via
     #   gradio
     #   gradio-client
+    #   tokenizers
+    #   transformers
+    #   vllm
 idna==3.10
     # via
     #   anyio
+    #   email-validator
     #   httpx
     #   requests
+    #   yarl
+importlib-metadata==8.0.0
+    # via
+    #   opentelemetry-api
+    #   vllm
+interegular==0.3.3
+    # via
+    #   lm-format-enforcer
+    #   outlines
+    #   outlines-core
 jinja2==3.1.6
+    # via
+    #   fastapi
+    #   gradio
+    #   outlines
+    #   torch
+jiter==0.9.0
+    # via openai
+jsonschema==4.23.0
+    # via
+    #   mistral-common
+    #   outlines
+    #   outlines-core
+    #   ray
+jsonschema-specifications==2025.4.1
+    # via jsonschema
+lark==1.2.2
+    # via
+    #   outlines
+    #   vllm
+llguidance==0.7.19
+    # via vllm
+llvmlite==0.44.0
+    # via numba
+lm-format-enforcer==0.10.11
+    # via vllm
 markdown-it-py==3.0.0
     # via rich
 markupsafe==3.0.2
     #   jinja2
 mdurl==0.1.2
     # via markdown-it-py
+mistral-common==1.5.4
+    # via vllm
+mpmath==1.3.0
+    # via sympy
+msgpack==1.1.0
+    # via ray
+msgspec==0.19.0
+    # via vllm
+multidict==6.4.3
+    # via
+    #   aiohttp
+    #   yarl
+nest-asyncio==1.6.0
+    # via outlines
+networkx==3.4.2
+    # via torch
+ninja==1.11.1.4
+    # via
+    #   vllm
+    #   xgrammar
+numba==0.61.2
+    # via vllm
 numpy==2.2.5
     # via
+    #   cupy-cuda12x
+    #   gguf
     #   gradio
+    #   mistral-common
+    #   numba
+    #   opencv-python-headless
+    #   outlines
     #   pandas
+    #   scipy
+    #   torchvision
+    #   transformers
+    #   vllm
+    #   xformers
+nvidia-cublas-cu12==12.4.5.8
+    # via
+    #   nvidia-cudnn-cu12
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cuda-cupti-cu12==12.4.127
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.4.127
+    # via torch
+nvidia-cuda-runtime-cu12==12.4.127
+    # via torch
+nvidia-cudnn-cu12==9.1.0.70
+    # via torch
+nvidia-cufft-cu12==11.2.1.3
+    # via torch
+nvidia-curand-cu12==10.3.5.147
+    # via torch
+nvidia-cusolver-cu12==11.6.1.9
+    # via torch
+nvidia-cusparse-cu12==12.3.1.170
+    # via
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cusparselt-cu12==0.6.2
+    # via torch
+nvidia-nccl-cu12==2.21.5
+    # via torch
+nvidia-nvjitlink-cu12==12.4.127
+    # via
+    #   nvidia-cusolver-cu12
+    #   nvidia-cusparse-cu12
+    #   torch
+nvidia-nvtx-cu12==12.4.127
+    # via torch
+openai==1.77.0
+    # via
+    #   sdmrec-demo (pyproject.toml)
+    #   vllm
+opencv-python-headless==4.11.0.86
+    # via
+    #   mistral-common
+    #   vllm
+opentelemetry-api==1.26.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
+    #   vllm
+opentelemetry-exporter-otlp==1.26.0
+    # via vllm
+opentelemetry-exporter-otlp-proto-common==1.26.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+    # via opentelemetry-exporter-otlp
+opentelemetry-exporter-otlp-proto-http==1.26.0
+    # via opentelemetry-exporter-otlp
+opentelemetry-proto==1.26.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-common
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-sdk==1.26.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+    #   vllm
+opentelemetry-semantic-conventions==0.47b0
+    # via opentelemetry-sdk
+opentelemetry-semantic-conventions-ai==0.4.5
+    # via vllm
 orjson==3.10.18
     # via gradio
+outlines==0.1.11
+    # via vllm
+outlines-core==0.1.26
+    # via outlines
 packaging==25.0
     # via
     #   gradio
     #   gradio-client
     #   huggingface-hub
+    #   lm-format-enforcer
+    #   ray
+    #   transformers
 pandas==2.2.3
     # via gradio
+partial-json-parser==0.2.1.1.post5
+    # via vllm
 pillow==11.2.1
+    # via
+    #   gradio
+    #   mistral-common
+    #   torchvision
+    #   vllm
+prometheus-client==0.21.1
+    # via
+    #   prometheus-fastapi-instrumentator
+    #   vllm
+prometheus-fastapi-instrumentator==7.1.0
+    # via vllm
+propcache==0.3.1
+    # via
+    #   aiohttp
+    #   yarl
+protobuf==4.25.7
+    # via
+    #   googleapis-common-protos
+    #   opentelemetry-proto
+    #   ray
+    #   vllm
+psutil==7.0.0
+    # via vllm
+py-cpuinfo==9.0.0
+    # via vllm
+pycountry==24.6.1
+    # via outlines
 pydantic==2.11.4
     # via
+    #   sdmrec-demo (pyproject.toml)
+    #   compressed-tensors
     #   fastapi
     #   gradio
+    #   lm-format-enforcer
+    #   mistral-common
+    #   openai
+    #   outlines
+    #   vllm
+    #   xgrammar
 pydantic-core==2.33.2
     # via pydantic
 pydub==0.25.1
     # via rich
 python-dateutil==2.9.0.post0
     # via pandas
+python-dotenv==1.1.0
+    # via uvicorn
+python-json-logger==3.3.0
+    # via vllm
 python-multipart==0.0.20
+    # via
+    #   fastapi
+    #   gradio
 pytz==2025.2
     # via pandas
 pyyaml==6.0.2
     # via
+    #   gguf
     #   gradio
     #   huggingface-hub
+    #   lm-format-enforcer
+    #   ray
+    #   transformers
+    #   uvicorn
+    #   vllm
+pyzmq==26.4.0
+    # via vllm
+ray==2.45.0
+    # via vllm
+referencing==0.36.2
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+    #   outlines
+regex==2024.11.6
+    # via
+    #   tiktoken
+    #   transformers
 requests==2.32.3
+    # via
+    #   huggingface-hub
+    #   mistral-common
+    #   opentelemetry-exporter-otlp-proto-http
+    #   outlines
+    #   ray
+    #   tiktoken
+    #   transformers
+    #   vllm
 rich==14.0.0
+    # via
+    #   rich-toolkit
+    #   typer
+rich-toolkit==0.14.5
+    # via fastapi-cli
+rpds-py==0.24.0
+    # via
+    #   jsonschema
+    #   referencing
 ruff==0.11.8
     # via gradio
 safehttpx==0.1.6
     # via gradio
+safetensors==0.5.3
+    # via transformers
+scipy==1.15.2
+    # via vllm
 semantic-version==2.10.0
     # via gradio
+sentencepiece==0.2.0
+    # via
+    #   gguf
+    #   mistral-common
+    #   vllm
+    #   xgrammar
+setuptools==80.3.1
+    # via
+    #   torch
+    #   vllm
 shellingham==1.5.4
     # via typer
 six==1.17.0
+    # via
+    #   python-dateutil
+    #   vllm
 sniffio==1.3.1
+    # via
+    #   anyio
+    #   openai
 starlette==0.46.2
     # via
     #   fastapi
     #   gradio
+    #   prometheus-fastapi-instrumentator
+sympy==1.13.1
+    # via torch
+tiktoken==0.9.0
+    # via
+    #   mistral-common
+    #   vllm
+    #   xgrammar
+tokenizers==0.21.1
+    # via
+    #   transformers
+    #   vllm
 tomlkit==0.13.2
     # via gradio
+torch==2.6.0
+    # via
+    #   compressed-tensors
+    #   outlines
+    #   torchaudio
+    #   torchvision
+    #   vllm
+    #   xformers
+    #   xgrammar
+torchaudio==2.6.0
+    # via vllm
+torchvision==0.21.0
+    # via vllm
 tqdm==4.67.1
+    # via
+    #   gguf
+    #   huggingface-hub
+    #   openai
+    #   outlines
+    #   transformers
+    #   vllm
+transformers==4.51.3
+    # via
+    #   compressed-tensors
+    #   vllm
+    #   xgrammar
+triton==3.2.0
+    # via
+    #   torch
+    #   xgrammar
 typer==0.15.3
+    # via
+    #   fastapi-cli
+    #   gradio
 typing-extensions==4.13.2
     # via
     #   anyio
     #   gradio
     #   gradio-client
     #   huggingface-hub
+    #   mistral-common
+    #   openai
+    #   opentelemetry-sdk
+    #   outlines
     #   pydantic
     #   pydantic-core
+    #   referencing
+    #   rich-toolkit
+    #   torch
     #   typer
     #   typing-inspection
+    #   vllm
 typing-inspection==0.4.0
     # via pydantic
 tzdata==2025.2
 urllib3==2.4.0
     # via requests
 uvicorn==0.34.2
+    # via
+    #   fastapi
+    #   fastapi-cli
+    #   gradio
+uvloop==0.21.0
+    # via uvicorn
+vllm==0.8.5.post1
+    # via sdmrec-demo (pyproject.toml)
+watchfiles==1.0.5
+    # via
+    #   uvicorn
+    #   vllm
 websockets==15.0.1
+    # via
+    #   gradio-client
+    #   uvicorn
+wrapt==1.17.2
+    # via deprecated
+xformers==0.0.29.post2
+    # via vllm
+xgrammar==0.1.18
+    # via vllm
+yarl==1.20.0
+    # via aiohttp
+zipp==3.21.0
+    # via importlib-metadata

uv.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff