pediot commited on
Commit
88e0bae
·
1 Parent(s): 3930c1a

first commit

Browse files
Files changed (8) hide show
  1. .gitignore +63 -0
  2. Dockerfile +16 -0
  3. README.md +41 -7
  4. app.py +62 -0
  5. requirements.txt +7 -0
  6. src/encoder.py +48 -0
  7. src/models.py +20 -0
  8. src/utils.py +64 -0
.gitignore ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python-related files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.swp
5
+ .DS_Store
6
+ *.egg-info/
7
+
8
+ # Virtual environment
9
+ venv/
10
+ env/
11
+ *.venv/
12
+
13
+ # Jupyter Notebooks checkpoints
14
+ .ipynb_checkpoints/
15
+
16
+ # Logs
17
+ logs/
18
+ *.log
19
+
20
+ # Hugging Face Transformers cache
21
+ ~/.cache/huggingface/
22
+
23
+ # Docker-related files
24
+ *.dockerignore
25
+
26
+ # Ignore compiled code
27
+ *.so
28
+ *.o
29
+ *.out
30
+ *.a
31
+
32
+ # Ignore OS-specific files
33
+ Thumbs.db
34
+ ehthumbs.db
35
+
36
+ # Ignore FastAPI auto-generated files
37
+ *.db
38
+ instance/
39
+ .env
40
+ .env.local
41
+ .env.*.local
42
+
43
+ # VS Code settings
44
+ .vscode/
45
+ .history/
46
+
47
+ # Ignore dependency files
48
+ pip-log.txt
49
+ pip-delete-this-directory.txt
50
+
51
+ # Ignore coverage files
52
+ .coverage
53
+ htmlcov/
54
+ coverage.xml
55
+
56
+ # Ignore test-related files
57
+ .tox/
58
+ .pytest_cache/
59
+ nosetests.xml
60
+ test-reports/
61
+
62
+ # Ignore Hugging Face Spaces cache
63
+ space_runtime/
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /app
4
+
5
+ ENV HF_HOME=/app/hf_cache
6
+ ENV HF_TOKEN=${HF_TOKEN}
7
+ RUN mkdir -p /app/hf_cache && chmod 777 /app/hf_cache
8
+
9
+ COPY requirements.txt .
10
+ RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ COPY . .
13
+
14
+ EXPOSE 7860
15
+
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "4"]
README.md CHANGED
@@ -1,12 +1,46 @@
1
  ---
2
- title: Fclip
3
- emoji: 🚀
4
- colorFrom: blue
5
- colorTo: gray
6
  sdk: docker
7
  pinned: false
8
- license: mit
9
- short_description: Generate text & image embeddings for clothing items
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Fashionclip
3
+ emoji: 🌖
4
+ colorFrom: purple
5
+ colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
+ license: cc-by-nc-4.0
9
+ short_description: Convert text to embeddings with FashionCLIP
10
  ---
11
 
12
+ # Install
13
+
14
+ ### Create SSH key
15
+
16
+ ```
17
+ ls ~/.ssh/id_rsa.pub
18
+ ssh-keygen -t rsa -b 4096 -C "[email protected]"
19
+ cat ~/.ssh/id_rsa.pub
20
+ ```
21
+
22
+ ### Add key to HF SSH key settings
23
+
24
+ ### Clone project
25
+
26
+ ```
27
+ git clone [email protected]:spaces/pdiot/fclip.git
28
+ python -m venv venv
29
+ source venv/bin/activate
30
+ pip install -r requirements.txt
31
+ ```
32
+
33
+ # Usage
34
+
35
+ ### FastAPI
36
+
37
+ ```
38
+ uvicorn app:app --host 0.0.0.0 --port 8080 --reload
39
+ ```
40
+
41
+ ### Docker
42
+
43
+ ```
44
+ docker build -t fclip .
45
+ docker run -p 8080:7860 fclip
46
+ ```
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, status, Security
2
+ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
3
+ from decouple import config
4
+
5
+ from src.encoder import FashionCLIPEncoder
6
+ from src.models import TextRequest, ImageRequest, Response
7
+
8
+
9
+ security = HTTPBearer()
10
+ encoder = FashionCLIPEncoder()
11
+
12
+
13
+ API_TOKEN = config("API_TOKEN")
14
+
15
+
16
+ app = FastAPI()
17
+
18
+
19
+ @app.get("/")
20
+ async def root():
21
+ return {
22
+ "status": "ok",
23
+ "message": "FashionCLIP API is running",
24
+ "endpoints": {
25
+ "encode_texts": "POST /encode_texts - Get embeddings for text inputs",
26
+ "encode_images": "POST /encode_images - Get embeddings for image inputs",
27
+ },
28
+ }
29
+
30
+
31
+ @app.post("/encode_texts")
32
+ async def encode_texts(
33
+ request: TextRequest, credentials: HTTPAuthorizationCredentials = Security(security)
34
+ ) -> Response:
35
+ if credentials.credentials != API_TOKEN:
36
+ raise HTTPException(
37
+ status_code=status.HTTP_401_UNAUTHORIZED,
38
+ detail="Invalid authentication token",
39
+ )
40
+
41
+ embeddings = encoder.encode_text(request.texts)
42
+ response = Response(embeddings=embeddings)
43
+
44
+ return response
45
+
46
+
47
+ @app.post("/encode_images")
48
+ async def encode_images(
49
+ request: ImageRequest,
50
+ credentials: HTTPAuthorizationCredentials = Security(security),
51
+ ) -> Response:
52
+ if credentials.credentials != API_TOKEN:
53
+ raise HTTPException(
54
+ status_code=status.HTTP_401_UNAUTHORIZED,
55
+ detail="Invalid authentication token",
56
+ )
57
+
58
+ images = request.download()
59
+ embeddings = encoder.encode_images(images)
60
+ response = Response(embeddings=embeddings)
61
+
62
+ return response
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pydantic
4
+ torch
5
+ transformers
6
+ open-clip-torch
7
+ python-decouple
src/encoder.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict
2
+ from PIL.Image import Image
3
+
4
+ import torch
5
+ from transformers import AutoModel, AutoProcessor
6
+
7
+
8
+ MODEL_NAME = "Marqo/marqo-fashionCLIP"
9
+
10
+
11
+ class FashionCLIPEncoder:
12
+ def __init__(self):
13
+ self.processor = AutoProcessor.from_pretrained(
14
+ MODEL_NAME, trust_remote_code=True
15
+ )
16
+ self.model = AutoModel.from_pretrained(MODEL_NAME, trust_remote_code=True)
17
+
18
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
+ self.model = self.model.to(self.device)
20
+ self.model.eval()
21
+
22
+ def encode_text(self, texts: List[str]) -> List[List[float]]:
23
+ kwargs = {
24
+ "padding": "max_length",
25
+ "return_tensors": "pt",
26
+ "truncation": True,
27
+ }
28
+ inputs = self.processor(text=texts, **kwargs)
29
+
30
+ with torch.no_grad():
31
+ batch = {k: v.to(self.device) for k, v in inputs.items()}
32
+ return self._encode_text(batch)
33
+
34
+ def encode_images(self, images: List[Image]) -> List[List[float]]:
35
+ kwargs = {
36
+ "return_tensors": "pt",
37
+ }
38
+ inputs = self.processor(images=images, **kwargs)
39
+
40
+ with torch.no_grad():
41
+ batch = {k: v.to(self.device) for k, v in inputs.items()}
42
+ return self._encode_images(batch)
43
+
44
+ def _encode_text(self, batch: Dict) -> List[List[float]]:
45
+ return self.model.get_text_features(**batch).detach().cpu().numpy().tolist()
46
+
47
+ def _encode_images(self, batch: Dict) -> List[List[float]]:
48
+ return self.model.get_image_features(**batch).detach().cpu().numpy().tolist()
src/models.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List
3
+ from PIL.Image import Image
4
+
5
+ from .utils import download_image_as_pil
6
+
7
+
8
+ class TextRequest(BaseModel):
9
+ texts: List[str]
10
+
11
+
12
+ class ImageRequest(BaseModel):
13
+ urls: List[str]
14
+
15
+ def download(self) -> List[Image]:
16
+ return [download_image_as_pil(url) for url in self.urls]
17
+
18
+
19
+ class Response(BaseModel):
20
+ embeddings: List[List[float]]
src/utils.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict
2
+
3
+ import requests, torch
4
+ from PIL import Image
5
+
6
+
7
+ REQUESTS_HEADERS = {
8
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
9
+ }
10
+
11
+
12
+ def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
13
+ try:
14
+ response = requests.get(
15
+ url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout
16
+ )
17
+
18
+ if response.status_code == 200:
19
+ return Image.open(response.raw)
20
+
21
+ except Exception as e:
22
+ return
23
+
24
+
25
+ def analyze_model_parameters(model: torch.nn.Module) -> Dict:
26
+ total_params = 0
27
+ param_types = set()
28
+ param_type_counts = {}
29
+
30
+ for param in model.parameters():
31
+ total_params += param.numel()
32
+ dtype = param.dtype
33
+ param_types.add(dtype)
34
+ param_type_counts[dtype] = param_type_counts.get(dtype, 0) + param.numel()
35
+
36
+ results = {
37
+ "total_params": total_params,
38
+ "param_types": {},
39
+ "device_info": {
40
+ "device": next(model.parameters()).device,
41
+ "cuda_available": torch.cuda.is_available()
42
+ }
43
+ }
44
+
45
+ for dtype in param_types:
46
+ count = param_type_counts[dtype]
47
+ percentage = (count / total_params) * 100
48
+ memory_bytes = count * torch.finfo(dtype).bits // 8
49
+ memory_mb = memory_bytes / (1024 * 1024)
50
+
51
+ results["param_types"][str(dtype)] = {
52
+ "count": count,
53
+ "percentage": percentage,
54
+ "memory_mb": memory_mb
55
+ }
56
+
57
+ if torch.cuda.is_available():
58
+ results["device_info"].update({
59
+ "cuda_device": torch.cuda.get_device_name(0),
60
+ "cuda_memory_allocated_mb": torch.cuda.memory_allocated(0) / 1024**2,
61
+ "cuda_memory_cached_mb": torch.cuda.memory_reserved(0) / 1024**2
62
+ })
63
+
64
+ return results