Spaces:

kohlin
/

nlp-spacy

Runtime error

kohlin commited on Feb 17

Commit

88f73ef

0 Parent(s):

Initial commit

Files changed (5) hide show

.github/workflows/ci.yml ADDED Viewed

+name: CI/CD Pipeline
+on:
+  push:
+    branches:
+      - main
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.8'
+    - name: Install dependencies
+      run: |
+        pip install --upgrade pip
+        pip install -r requirements.txt
+        python -m spacy download en_core_web_sm
+    - name: Build Docker image
+      run: |
+        docker build -t kohlin/nlp-project:latest .
+    - name: Push Docker image
+      env:
+        DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
+        DOCKERHUB_PASSWORD: ${{ secrets.DOCKERHUB_PASSWORD }}
+      run: |
+        echo $DOCKERHUB_PASSWORD | docker login -u $DOCKERHUB_USERNAME --password-stdin
+        docker push yourusername/yourimagename:latest

.gitignore ADDED Viewed

+__pycache__/
+*.pyc
+.env

Dockerfile ADDED Viewed

+# Use an official Python runtime as a parent image
+FROM python:3.8-slim
+# Set the working directory in the container
+WORKDIR /app
+# Copy the requirements file into the container
+COPY requirements.txt .
+# Install any needed packages specified in requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+# Download the spaCy English model
+RUN python -m spacy download en_core_web_sm
+# Copy the rest of the working directory contents into the container at /app
+COPY . .
+# Expose port 80 for the container
+EXPOSE 80
+# Run the FastAPI app with uvicorn
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]

main.py ADDED Viewed

+from fastapi import FastAPI
+from pydantic import BaseModel
+import spacy
+import time
+from langdetect import detect
+from transformers import BertTokenizer, BertModel
+app = FastAPI(title="Text Processing API")
+# Load models only once (at startup)
+nlp = spacy.load("en_core_web_sm")
+tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-uncased')
+model = BertModel.from_pretrained('bert-base-multilingual-uncased')
+def process_text(text: str):
+    # Detect language
+    lang = detect(text)
+    # Start timer
+    start_time = time.time()
+    # Process text with spaCy for NER and tokenization
+    doc = nlp(text)
+    tokens = [token.text for token in doc]
+    entities = [(ent.text, ent.label_) for ent in doc.ents]
+    # BERT embedding (showcasing the operation)
+    encoded_input = tokenizer(text, return_tensors='pt')
+    output = model(**encoded_input)
+    # Calculate time taken
+    end_time = time.time()
+    time_taken = end_time - start_time
+    return {
+        "language": lang,
+        "tokens": tokens,
+        "named_entities": entities,
+        "query_length": len(text),
+        "time_taken": time_taken
+    }
+# Define request body model
+class Query(BaseModel):
+    text: str
+# FastAPI endpoint to process text
+@app.post("/process/")
+async def process_query(query: Query):
+    results = process_text(query.text)
+    return results

requirements.txt ADDED Viewed

+fastapi
+uvicorn
+spacy
+langdetect
+transformers
+torch