Spaces:

benkada
/

aiWeb

Sleeping

App Files Files Community

benkada commited on 5 days ago

Commit

6991b14

verified ·

1 Parent(s): f7e9534

Update main.py

Browse files

Files changed (1) hide show

main.py +87 -124

main.py CHANGED Viewed

@@ -1,126 +1,89 @@
-import os
-from fastapi import FastAPI, UploadFile, File, HTTPException
-from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
-from pydantic import BaseModel
-from typing import Optional
 from PIL import Image
-import pytesseract
-from transformers import pipeline
-from langchain.chains import LLMChain
-from langchain.prompts import PromptTemplate
-from langchain_community.llms import HuggingFaceHub
-# Ensure HF cache directory is set before any HF import uses it
-os.environ.setdefault("HF_HOME", os.getenv("HF_HOME", "/app/cache"))
-# FastAPI application
-app = FastAPI(
-    title="AI-Powered Web Application API",
-    description="API for document summarization, image captioning, and question answering",
-    version="1.0.0"
-)
-# CORS middleware
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# ----------------
-# Schemas
-# ----------------
-class SummarizeRequest(BaseModel):
-    text: str
-    max_length: Optional[int] = 150
-    min_length: Optional[int] = 40
-class QARequest(BaseModel):
-    question: str
-    context: Optional[str] = None
-# ----------------
-# Model loaders (lazy)
-# ----------------
-_cache_dir = os.getenv("HF_HOME", "/app/cache")
-_summarizer = None
-_captioner = None
-_qa_chain = None
-def get_summarizer():
-    global _summarizer
-    if _summarizer is None:
-        _summarizer = pipeline(
-            "summarization",
-            model="facebook/bart-large-cnn",
-            cache_dir=_cache_dir
-        )
-    return _summarizer
-def get_image_captioner():
-    global _captioner
-    if _captioner is None:
-        _captioner = pipeline(
-            "image-to-text",
-            model="nlpconnect/vit-gpt2-image-captioning",
-            cache_dir=_cache_dir
-        )
-    return _captioner
-def get_qa_chain():
-    global _qa_chain
-    if _qa_chain is None:
-        llm = HuggingFaceHub(
-            repo_id="google/flan-t5-large",
-            model_kwargs={"cache_dir": _cache_dir},
-            huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN", None)
-        )
-        prompt = PromptTemplate(
-            input_variables=["context", "question"],
-            template="""
-Use the following context to answer the question:
-{context}
-Question: {question}
-Answer:"""
-        )
-        _qa_chain = LLMChain(llm=llm, prompt=prompt)
-    return _qa_chain
-# ----------------
-# Routes
-# ----------------
-@app.post("/summarize")
-def summarize(req: SummarizeRequest):
-    summarizer = get_summarizer()
-    result = summarizer(
-        req.text,
-        max_length=req.max_length,
-        min_length=req.min_length,
-        clean_up_tokenization_spaces=True
-    )
-    return JSONResponse(content={"summary": result[0]["summary_text"]})
-@app.post("/caption")
-async def caption_image(file: UploadFile = File(...)):
-    try:
-        img = Image.open(file.file).convert("RGB")
-        captioner = get_image_captioner()
-        result = captioner(img)
-        return JSONResponse(content={"caption": result[0]["generated_text"]})
-    except Exception as e:
-        raise HTTPException(status_code=400, detail=str(e))
-@app.post("/qa")
-def question_answer(req: QARequest):
-    chain = get_qa_chain()
-    context = req.context or ""
-    answer = chain.run({"context": context, "question": req.question})
-    return JSONResponse(content={"answer": answer})

+from fastapi import FastAPI, UploadFile, File, Form
 from fastapi.responses import JSONResponse
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import uvicorn
+import tempfile
+import os
 from PIL import Image
+import torch
+app = FastAPI()
+# Load tokenizers fast but not full models immediately
+tokenizers = {
+    "qwen": AutoTokenizer.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct", trust_remote_code=True),
+    "deepseek": AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-V2-Chat", trust_remote_code=True),
+    "llama": AutoTokenizer.from_pretrained("meta-llama/Llama-2-70b-chat-hf", trust_remote_code=True),
+}
+models = {}
+def load_model(name):
+    if name not in models:
+        if name == "qwen":
+            models[name] = AutoModelForCausalLM.from_pretrained(
+                "Qwen/Qwen2.5-VL-7B-Instruct",
+                device_map="auto",
+                trust_remote_code=True,
+                torch_dtype=torch.float16
+            )
+        elif name == "deepseek":
+            models[name] = AutoModelForCausalLM.from_pretrained(
+                "deepseek-ai/DeepSeek-V2-Chat",
+                device_map="auto",
+                trust_remote_code=True,
+                torch_dtype=torch.float16
+            )
+        elif name == "llama":
+            models[name] = AutoModelForCausalLM.from_pretrained(
+                "meta-llama/Llama-2-70b-chat-hf",
+                device_map="auto",
+                trust_remote_code=True,
+                torch_dtype=torch.float16
+            )
+    return models[name]
+@app.post("/api/summarize")
+async def summarize(file: UploadFile = File(...)):
+    ext = os.path.splitext(file.filename)[1].lower()
+    temp_path = os.path.join(tempfile.gettempdir(), file.filename)
+    with open(temp_path, "wb") as f:
+        f.write(await file.read())
+    # For now: Just simulate basic summarization
+    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+    with open(temp_path, 'r', errors='ignore') as f:
+        text = f.read()
+    if len(text) > 1024:
+        text = text[:1024]
+    summary = summarizer(text, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
+    return JSONResponse({"result": summary})
+@app.post("/api/caption")
+async def caption(file: UploadFile = File(...)):
+    image = Image.open(await file.read())
+    # For now: Use a simple vision model, because Qwen2.5 VL loading takes a lot of time
+    captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
+    caption = captioner(image)[0]['generated_text']
+    return JSONResponse({"result": caption})
+@app.post("/api/qa")
+async def question_answer(file: UploadFile = File(...), question: str = Form(...)):
+    temp_path = os.path.join(tempfile.gettempdir(), file.filename)
+    with open(temp_path, "wb") as f:
+        f.write(await file.read())
+    # For now: pick deepseek model for QA
+    tokenizer = tokenizers["deepseek"]
+    model = load_model("deepseek")
+    inputs = tokenizer(question, return_tensors="pt").to(model.device)
+    outputs = model.generate(**inputs, max_new_tokens=100)
+    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return JSONResponse({"result": answer})
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)