first commit
Browse files- .gitignore +63 -0
- Dockerfile +16 -0
- README.md +41 -7
- app.py +62 -0
- requirements.txt +7 -0
- src/encoder.py +48 -0
- src/models.py +20 -0
- src/utils.py +64 -0
.gitignore
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Python-related files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*.swp
|
5 |
+
.DS_Store
|
6 |
+
*.egg-info/
|
7 |
+
|
8 |
+
# Virtual environment
|
9 |
+
venv/
|
10 |
+
env/
|
11 |
+
*.venv/
|
12 |
+
|
13 |
+
# Jupyter Notebooks checkpoints
|
14 |
+
.ipynb_checkpoints/
|
15 |
+
|
16 |
+
# Logs
|
17 |
+
logs/
|
18 |
+
*.log
|
19 |
+
|
20 |
+
# Hugging Face Transformers cache
|
21 |
+
~/.cache/huggingface/
|
22 |
+
|
23 |
+
# Docker-related files
|
24 |
+
*.dockerignore
|
25 |
+
|
26 |
+
# Ignore compiled code
|
27 |
+
*.so
|
28 |
+
*.o
|
29 |
+
*.out
|
30 |
+
*.a
|
31 |
+
|
32 |
+
# Ignore OS-specific files
|
33 |
+
Thumbs.db
|
34 |
+
ehthumbs.db
|
35 |
+
|
36 |
+
# Ignore FastAPI auto-generated files
|
37 |
+
*.db
|
38 |
+
instance/
|
39 |
+
.env
|
40 |
+
.env.local
|
41 |
+
.env.*.local
|
42 |
+
|
43 |
+
# VS Code settings
|
44 |
+
.vscode/
|
45 |
+
.history/
|
46 |
+
|
47 |
+
# Ignore dependency files
|
48 |
+
pip-log.txt
|
49 |
+
pip-delete-this-directory.txt
|
50 |
+
|
51 |
+
# Ignore coverage files
|
52 |
+
.coverage
|
53 |
+
htmlcov/
|
54 |
+
coverage.xml
|
55 |
+
|
56 |
+
# Ignore test-related files
|
57 |
+
.tox/
|
58 |
+
.pytest_cache/
|
59 |
+
nosetests.xml
|
60 |
+
test-reports/
|
61 |
+
|
62 |
+
# Ignore Hugging Face Spaces cache
|
63 |
+
space_runtime/
|
Dockerfile
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
ENV HF_HOME=/app/hf_cache
|
6 |
+
ENV HF_TOKEN=${HF_TOKEN}
|
7 |
+
RUN mkdir -p /app/hf_cache && chmod 777 /app/hf_cache
|
8 |
+
|
9 |
+
COPY requirements.txt .
|
10 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
11 |
+
|
12 |
+
COPY . .
|
13 |
+
|
14 |
+
EXPOSE 7860
|
15 |
+
|
16 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "4"]
|
README.md
CHANGED
@@ -1,12 +1,46 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
-
license:
|
9 |
-
short_description:
|
10 |
---
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Fashionclip
|
3 |
+
emoji: 🌖
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: yellow
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
+
license: cc-by-nc-4.0
|
9 |
+
short_description: Convert text to embeddings with FashionCLIP
|
10 |
---
|
11 |
|
12 |
+
# Install
|
13 |
+
|
14 |
+
### Create SSH key
|
15 |
+
|
16 |
+
```
|
17 |
+
ls ~/.ssh/id_rsa.pub
|
18 |
+
ssh-keygen -t rsa -b 4096 -C "[email protected]"
|
19 |
+
cat ~/.ssh/id_rsa.pub
|
20 |
+
```
|
21 |
+
|
22 |
+
### Add key to HF SSH key settings
|
23 |
+
|
24 |
+
### Clone project
|
25 |
+
|
26 |
+
```
|
27 |
+
git clone [email protected]:spaces/pdiot/fclip.git
|
28 |
+
python -m venv venv
|
29 |
+
source venv/bin/activate
|
30 |
+
pip install -r requirements.txt
|
31 |
+
```
|
32 |
+
|
33 |
+
# Usage
|
34 |
+
|
35 |
+
### FastAPI
|
36 |
+
|
37 |
+
```
|
38 |
+
uvicorn app:app --host 0.0.0.0 --port 8080 --reload
|
39 |
+
```
|
40 |
+
|
41 |
+
### Docker
|
42 |
+
|
43 |
+
```
|
44 |
+
docker build -t fclip .
|
45 |
+
docker run -p 8080:7860 fclip
|
46 |
+
```
|
app.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException, status, Security
|
2 |
+
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
3 |
+
from decouple import config
|
4 |
+
|
5 |
+
from src.encoder import FashionCLIPEncoder
|
6 |
+
from src.models import TextRequest, ImageRequest, Response
|
7 |
+
|
8 |
+
|
9 |
+
security = HTTPBearer()
|
10 |
+
encoder = FashionCLIPEncoder()
|
11 |
+
|
12 |
+
|
13 |
+
API_TOKEN = config("API_TOKEN")
|
14 |
+
|
15 |
+
|
16 |
+
app = FastAPI()
|
17 |
+
|
18 |
+
|
19 |
+
@app.get("/")
|
20 |
+
async def root():
|
21 |
+
return {
|
22 |
+
"status": "ok",
|
23 |
+
"message": "FashionCLIP API is running",
|
24 |
+
"endpoints": {
|
25 |
+
"encode_texts": "POST /encode_texts - Get embeddings for text inputs",
|
26 |
+
"encode_images": "POST /encode_images - Get embeddings for image inputs",
|
27 |
+
},
|
28 |
+
}
|
29 |
+
|
30 |
+
|
31 |
+
@app.post("/encode_texts")
|
32 |
+
async def encode_texts(
|
33 |
+
request: TextRequest, credentials: HTTPAuthorizationCredentials = Security(security)
|
34 |
+
) -> Response:
|
35 |
+
if credentials.credentials != API_TOKEN:
|
36 |
+
raise HTTPException(
|
37 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
38 |
+
detail="Invalid authentication token",
|
39 |
+
)
|
40 |
+
|
41 |
+
embeddings = encoder.encode_text(request.texts)
|
42 |
+
response = Response(embeddings=embeddings)
|
43 |
+
|
44 |
+
return response
|
45 |
+
|
46 |
+
|
47 |
+
@app.post("/encode_images")
|
48 |
+
async def encode_images(
|
49 |
+
request: ImageRequest,
|
50 |
+
credentials: HTTPAuthorizationCredentials = Security(security),
|
51 |
+
) -> Response:
|
52 |
+
if credentials.credentials != API_TOKEN:
|
53 |
+
raise HTTPException(
|
54 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
55 |
+
detail="Invalid authentication token",
|
56 |
+
)
|
57 |
+
|
58 |
+
images = request.download()
|
59 |
+
embeddings = encoder.encode_images(images)
|
60 |
+
response = Response(embeddings=embeddings)
|
61 |
+
|
62 |
+
return response
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi
|
2 |
+
uvicorn
|
3 |
+
pydantic
|
4 |
+
torch
|
5 |
+
transformers
|
6 |
+
open-clip-torch
|
7 |
+
python-decouple
|
src/encoder.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List, Dict
|
2 |
+
from PIL.Image import Image
|
3 |
+
|
4 |
+
import torch
|
5 |
+
from transformers import AutoModel, AutoProcessor
|
6 |
+
|
7 |
+
|
8 |
+
MODEL_NAME = "Marqo/marqo-fashionCLIP"
|
9 |
+
|
10 |
+
|
11 |
+
class FashionCLIPEncoder:
|
12 |
+
def __init__(self):
|
13 |
+
self.processor = AutoProcessor.from_pretrained(
|
14 |
+
MODEL_NAME, trust_remote_code=True
|
15 |
+
)
|
16 |
+
self.model = AutoModel.from_pretrained(MODEL_NAME, trust_remote_code=True)
|
17 |
+
|
18 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
19 |
+
self.model = self.model.to(self.device)
|
20 |
+
self.model.eval()
|
21 |
+
|
22 |
+
def encode_text(self, texts: List[str]) -> List[List[float]]:
|
23 |
+
kwargs = {
|
24 |
+
"padding": "max_length",
|
25 |
+
"return_tensors": "pt",
|
26 |
+
"truncation": True,
|
27 |
+
}
|
28 |
+
inputs = self.processor(text=texts, **kwargs)
|
29 |
+
|
30 |
+
with torch.no_grad():
|
31 |
+
batch = {k: v.to(self.device) for k, v in inputs.items()}
|
32 |
+
return self._encode_text(batch)
|
33 |
+
|
34 |
+
def encode_images(self, images: List[Image]) -> List[List[float]]:
|
35 |
+
kwargs = {
|
36 |
+
"return_tensors": "pt",
|
37 |
+
}
|
38 |
+
inputs = self.processor(images=images, **kwargs)
|
39 |
+
|
40 |
+
with torch.no_grad():
|
41 |
+
batch = {k: v.to(self.device) for k, v in inputs.items()}
|
42 |
+
return self._encode_images(batch)
|
43 |
+
|
44 |
+
def _encode_text(self, batch: Dict) -> List[List[float]]:
|
45 |
+
return self.model.get_text_features(**batch).detach().cpu().numpy().tolist()
|
46 |
+
|
47 |
+
def _encode_images(self, batch: Dict) -> List[List[float]]:
|
48 |
+
return self.model.get_image_features(**batch).detach().cpu().numpy().tolist()
|
src/models.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
from typing import List
|
3 |
+
from PIL.Image import Image
|
4 |
+
|
5 |
+
from .utils import download_image_as_pil
|
6 |
+
|
7 |
+
|
8 |
+
class TextRequest(BaseModel):
|
9 |
+
texts: List[str]
|
10 |
+
|
11 |
+
|
12 |
+
class ImageRequest(BaseModel):
|
13 |
+
urls: List[str]
|
14 |
+
|
15 |
+
def download(self) -> List[Image]:
|
16 |
+
return [download_image_as_pil(url) for url in self.urls]
|
17 |
+
|
18 |
+
|
19 |
+
class Response(BaseModel):
|
20 |
+
embeddings: List[List[float]]
|
src/utils.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict
|
2 |
+
|
3 |
+
import requests, torch
|
4 |
+
from PIL import Image
|
5 |
+
|
6 |
+
|
7 |
+
REQUESTS_HEADERS = {
|
8 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
9 |
+
}
|
10 |
+
|
11 |
+
|
12 |
+
def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
|
13 |
+
try:
|
14 |
+
response = requests.get(
|
15 |
+
url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout
|
16 |
+
)
|
17 |
+
|
18 |
+
if response.status_code == 200:
|
19 |
+
return Image.open(response.raw)
|
20 |
+
|
21 |
+
except Exception as e:
|
22 |
+
return
|
23 |
+
|
24 |
+
|
25 |
+
def analyze_model_parameters(model: torch.nn.Module) -> Dict:
|
26 |
+
total_params = 0
|
27 |
+
param_types = set()
|
28 |
+
param_type_counts = {}
|
29 |
+
|
30 |
+
for param in model.parameters():
|
31 |
+
total_params += param.numel()
|
32 |
+
dtype = param.dtype
|
33 |
+
param_types.add(dtype)
|
34 |
+
param_type_counts[dtype] = param_type_counts.get(dtype, 0) + param.numel()
|
35 |
+
|
36 |
+
results = {
|
37 |
+
"total_params": total_params,
|
38 |
+
"param_types": {},
|
39 |
+
"device_info": {
|
40 |
+
"device": next(model.parameters()).device,
|
41 |
+
"cuda_available": torch.cuda.is_available()
|
42 |
+
}
|
43 |
+
}
|
44 |
+
|
45 |
+
for dtype in param_types:
|
46 |
+
count = param_type_counts[dtype]
|
47 |
+
percentage = (count / total_params) * 100
|
48 |
+
memory_bytes = count * torch.finfo(dtype).bits // 8
|
49 |
+
memory_mb = memory_bytes / (1024 * 1024)
|
50 |
+
|
51 |
+
results["param_types"][str(dtype)] = {
|
52 |
+
"count": count,
|
53 |
+
"percentage": percentage,
|
54 |
+
"memory_mb": memory_mb
|
55 |
+
}
|
56 |
+
|
57 |
+
if torch.cuda.is_available():
|
58 |
+
results["device_info"].update({
|
59 |
+
"cuda_device": torch.cuda.get_device_name(0),
|
60 |
+
"cuda_memory_allocated_mb": torch.cuda.memory_allocated(0) / 1024**2,
|
61 |
+
"cuda_memory_cached_mb": torch.cuda.memory_reserved(0) / 1024**2
|
62 |
+
})
|
63 |
+
|
64 |
+
return results
|