bearking58 commited on
Commit
cc8a450
·
1 Parent(s): 3a42546

feat: add healthcheck endpoint + reconfigure ci cd

Browse files
.github/workflows/ci-production.yml CHANGED
@@ -36,8 +36,11 @@ jobs:
36
  id: create_model
37
  run: |
38
  MODEL_ID=$(gcloud ai models upload \
 
 
 
39
  --region=${{ secrets.GCP_VERTEX_AI_REGION }} \
40
- --display-name="interview-ai-detector-model" \
41
  --container-image-uri="${{ secrets.GCP_REPO_REGION }}-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/interview-ai-detector/model-prediction:latest" \
42
  --format="value(model)")
43
  echo "MODEL_ID=${MODEL_ID}" >> $GITHUB_ENV
@@ -47,15 +50,15 @@ jobs:
47
  run: |
48
  ENDPOINT_ID=$(gcloud ai endpoints create \
49
  --region=${{ secrets.GCP_VERTEX_AI_REGION }} \
50
- --display-name="interview-ai-detector-endpoint" \
51
  --format="value(name)")
52
  echo "ENDPOINT_ID=${ENDPOINT_ID}" >> $GITHUB_ENV
53
 
54
  - name: Deploy model to endpoint
55
  run: |
56
  gcloud ai endpoints deploy-model ${{ env.ENDPOINT_ID }} \
57
- --region ${{ secrets.GCP_VERTEX_AI_REGION }} \
58
- --model ${{ env.MODEL_ID }} \
59
- --display-name interview-ai-detector-deployment \
60
- --machine-type n1-standard-4 \
61
- --accelerator count=1,type=nvidia-tesla-t4
 
36
  id: create_model
37
  run: |
38
  MODEL_ID=$(gcloud ai models upload \
39
+ --container-ports=8080
40
+ --container-predict-route="/predict" \
41
+ --container-health-route="/health" \
42
  --region=${{ secrets.GCP_VERTEX_AI_REGION }} \
43
+ --display-name=interview-ai-detector-model \
44
  --container-image-uri="${{ secrets.GCP_REPO_REGION }}-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/interview-ai-detector/model-prediction:latest" \
45
  --format="value(model)")
46
  echo "MODEL_ID=${MODEL_ID}" >> $GITHUB_ENV
 
50
  run: |
51
  ENDPOINT_ID=$(gcloud ai endpoints create \
52
  --region=${{ secrets.GCP_VERTEX_AI_REGION }} \
53
+ --display-name=interview-ai-detector-endpoint \
54
  --format="value(name)")
55
  echo "ENDPOINT_ID=${ENDPOINT_ID}" >> $GITHUB_ENV
56
 
57
  - name: Deploy model to endpoint
58
  run: |
59
  gcloud ai endpoints deploy-model ${{ env.ENDPOINT_ID }} \
60
+ --region=${{ secrets.GCP_VERTEX_AI_REGION }} \
61
+ --model=${{ env.MODEL_ID }} \
62
+ --display-name=interview-ai-detector-deployment \
63
+ --machine-type="n1-standard-4" \
64
+ --accelerator=count=1,type=nvidia-tesla-t4
Dockerfile CHANGED
@@ -13,5 +13,5 @@ RUN pip install --no-cache-dir -r requirements.txt
13
  # Make port 8080 available to the world outside this container
14
  EXPOSE 8080
15
 
16
- # Run gunicorn with Uvicorn workers
17
- CMD ["gunicorn", "-k", "uvicorn.workers.UvicornWorker", "-w", "4", "-b", "0.0.0.0:8080", "prediction:app"]
 
13
  # Make port 8080 available to the world outside this container
14
  EXPOSE 8080
15
 
16
+ # Run uvicorn
17
+ CMD ["uvicorn", "prediction:app", "--host", "0.0.0.0", "--port", "8080"]
prediction.py CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from hypothesis import BaseModelHypothesis
4
  from random_forest_dependencies import RandomForestDependencies
@@ -24,6 +24,11 @@ class RequestModel(BaseModel):
24
  instances: List[PredictRequest]
25
 
26
 
 
 
 
 
 
27
  @app.post("/predict")
28
  async def predict(request: RequestModel):
29
  responses = [process_instance(data) for data in request.instances]
 
1
+ from fastapi import FastAPI, Response, status
2
  from pydantic import BaseModel
3
  from hypothesis import BaseModelHypothesis
4
  from random_forest_dependencies import RandomForestDependencies
 
24
  instances: List[PredictRequest]
25
 
26
 
27
+ @app.get("/health")
28
+ async def is_alive():
29
+ return Response(status_code=status.HTTP_200_OK)
30
+
31
+
32
  @app.post("/predict")
33
  async def predict(request: RequestModel):
34
  responses = [process_instance(data) for data in request.instances]
requirements.txt CHANGED
@@ -6,5 +6,4 @@ textstat
6
  scikit-learn==1.4.1.post1
7
  transformers
8
  fastapi
9
- uvicorn
10
- gunicorn
 
6
  scikit-learn==1.4.1.post1
7
  transformers
8
  fastapi
9
+ uvicorn