Spaces:

KeivanR
/

qwen-classifier-demo

Sleeping

File size: 2,392 Bytes

import os

os.environ['HF_HOME'] = '/tmp/.cache/huggingface'  # Use /tmp in Spaces
os.makedirs(os.environ['HF_HOME'], exist_ok=True)  # Ensure directory exists

from fastapi import FastAPI
from fastapi.responses import HTMLResponse
from qwen_classifier.predict import predict_single  # Your existing function
from qwen_classifier.evaluate import evaluate_batch  # Your existing function
import torch
from huggingface_hub import login
from qwen_classifier.model import QwenClassifier
from qwen_classifier.config import HF_REPO, SPACE_URL
from pydantic import BaseModel

app = FastAPI(title="Qwen Classifier")

# Add this endpoint
@app.get("/", response_class=HTMLResponse)
def home():
    return f"""
    <html>
        <head>
            <title>Qwen Classifier</title>
        </head>
        <body>
            <h1>Qwen Classifier API</h1>
            <p>Available endpoints:</p>
            <ul>
                <li><strong>POST /predict</strong> - Classify text</li>
                <li><strong>POST /evaluate</strong> - Evaluate batch text prediction from zip file</li>
                <li><strong>GET /health</strong> - Check API status</li>
            </ul>
            <p>Try it: <code>curl -X POST {SPACE_URL}/predict -H "Content-Type: application/json" -d '{"text":"your text"}'</code></p>
        </body>
    </html>
    """

@app.on_event("startup")
async def load_model():
    # Warm up GPU
    torch.zeros(1).cuda() 
    # Read HF_TOKEN from Hugging Face Space secrets
    hf_token = os.getenv("HF_TOKEN")
    if not hf_token:
        raise ValueError("HF_TOKEN not found in environment variables")

    # Authenticate
    login(token=hf_token)
    
    # Load model (will cache in /home/user/.cache/huggingface)
    app.state.model = QwenClassifier.from_pretrained(
        HF_REPO,
    )
    print("Model loaded successfully!")



class PredictionRequest(BaseModel):
    text: str  # ← Enforces that 'text' must be a non-empty string

@app.post("/predict")
async def predict(request: PredictionRequest):  # ← Validates input automatically
    return predict_single(request.text, HF_REPO, backend="local")

@app.post("/evaluate")
async def evaluate(request: PredictionRequest):  # ← Validates input automatically
    return evaluate_batch(request.text, HF_REPO, backend="local")

@app.get("/health")
def health_check():
    return {"status": "healthy", "model": "loaded"}