kohlin commited on
Commit
88f73ef
·
0 Parent(s):

Initial commit

Browse files
Files changed (5) hide show
  1. .github/workflows/ci.yml +37 -0
  2. .gitignore +3 -0
  3. Dockerfile +23 -0
  4. main.py +51 -0
  5. requirements.txt +6 -0
.github/workflows/ci.yml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI/CD Pipeline
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout code
14
+ uses: actions/checkout@v2
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v2
18
+ with:
19
+ python-version: '3.8'
20
+
21
+ - name: Install dependencies
22
+ run: |
23
+ pip install --upgrade pip
24
+ pip install -r requirements.txt
25
+ python -m spacy download en_core_web_sm
26
+
27
+ - name: Build Docker image
28
+ run: |
29
+ docker build -t kohlin/nlp-project:latest .
30
+
31
+ - name: Push Docker image
32
+ env:
33
+ DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
34
+ DOCKERHUB_PASSWORD: ${{ secrets.DOCKERHUB_PASSWORD }}
35
+ run: |
36
+ echo $DOCKERHUB_PASSWORD | docker login -u $DOCKERHUB_USERNAME --password-stdin
37
+ docker push yourusername/yourimagename:latest
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ .env
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.8-slim
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Copy the requirements file into the container
8
+ COPY requirements.txt .
9
+
10
+ # Install any needed packages specified in requirements.txt
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Download the spaCy English model
14
+ RUN python -m spacy download en_core_web_sm
15
+
16
+ # Copy the rest of the working directory contents into the container at /app
17
+ COPY . .
18
+
19
+ # Expose port 80 for the container
20
+ EXPOSE 80
21
+
22
+ # Run the FastAPI app with uvicorn
23
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
main.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ import spacy
4
+ import time
5
+ from langdetect import detect
6
+ from transformers import BertTokenizer, BertModel
7
+
8
+ app = FastAPI(title="Text Processing API")
9
+
10
+ # Load models only once (at startup)
11
+ nlp = spacy.load("en_core_web_sm")
12
+ tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-uncased')
13
+ model = BertModel.from_pretrained('bert-base-multilingual-uncased')
14
+
15
+ def process_text(text: str):
16
+ # Detect language
17
+ lang = detect(text)
18
+
19
+ # Start timer
20
+ start_time = time.time()
21
+
22
+ # Process text with spaCy for NER and tokenization
23
+ doc = nlp(text)
24
+ tokens = [token.text for token in doc]
25
+ entities = [(ent.text, ent.label_) for ent in doc.ents]
26
+
27
+ # BERT embedding (showcasing the operation)
28
+ encoded_input = tokenizer(text, return_tensors='pt')
29
+ output = model(**encoded_input)
30
+
31
+ # Calculate time taken
32
+ end_time = time.time()
33
+ time_taken = end_time - start_time
34
+
35
+ return {
36
+ "language": lang,
37
+ "tokens": tokens,
38
+ "named_entities": entities,
39
+ "query_length": len(text),
40
+ "time_taken": time_taken
41
+ }
42
+
43
+ # Define request body model
44
+ class Query(BaseModel):
45
+ text: str
46
+
47
+ # FastAPI endpoint to process text
48
+ @app.post("/process/")
49
+ async def process_query(query: Query):
50
+ results = process_text(query.text)
51
+ return results
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ spacy
4
+ langdetect
5
+ transformers
6
+ torch