kohlin commited on
Commit
978a5b4
·
0 Parent(s):

Initial Commit

Browse files
.github/workflows/ci-cd.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI/CD Pipeline
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - feature/*
7
+ - develop
8
+ - main
9
+
10
+ jobs:
11
+ build:
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - name: Checkout Code
15
+ uses: actions/checkout@v3
16
+
17
+ - name: Set up Python
18
+ uses: actions/setup-python@v3
19
+ with:
20
+ python-version: 3.9
21
+
22
+ - name: Install Dependencies
23
+ run: pip install -r requirements.txt
24
+
25
+ - name: Run Tests
26
+ run: python -m unittest discover -s tests
27
+
28
+ merge-to-develop:
29
+ needs: build
30
+ runs-on: ubuntu-latest
31
+ if: github.ref == 'refs/heads/feature/*'
32
+ steps:
33
+ - name: Merge feature branch to develop
34
+ run: |
35
+ git fetch origin
36
+ git checkout develop
37
+ git merge --no-ff origin/${GITHUB_REF#refs/heads/}
38
+ git push origin develop
39
+
40
+ merge-to-main:
41
+ needs: merge-to-develop
42
+ runs-on: ubuntu-latest
43
+ if: github.ref == 'refs/heads/develop'
44
+ steps:
45
+ - name: Merge develop branch to main
46
+ run: |
47
+ git fetch origin
48
+ git checkout main
49
+ git merge --no-ff origin/develop
50
+ git push origin main
.github/workflows/deploy.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploy to Hugging Face
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ deploy:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout repository
14
+ uses: actions/checkout@v3
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v3
18
+ with:
19
+ python-version: '3.9'
20
+
21
+ - name: Push to Hugging Face
22
+ env:
23
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
24
+ run: |
25
+ git config --global user.email "[email protected]"
26
+ git config --global user.name "nkofficial-1005"
27
+ git remote add hf https://kohlin:[email protected]/spaces/kohlin/nlp-project
28
+ git push hf main
.github/workflows/docker-build.yml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Docker Build and Push
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ build-and-push:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout repository
14
+ uses: actions/checkout@v3
15
+
16
+ - name: Set up Docker Buildx
17
+ uses: docker/setup-buildx-action@v2
18
+
19
+ - name: Log in to Docker Hub
20
+ uses: docker/login-action@v2
21
+ with:
22
+ username: ${{ secrets.DOCKER_USERNAME }}
23
+ password: ${{ secrets.DOCKER_PASSWORD }}
24
+
25
+ - name: Build and Push Docker Image
26
+ uses: docker/build-push-action@v4
27
+ with:
28
+ context: .
29
+ push: true
30
+ tags: kohlin/nlp-project:latest
Dockerfile ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ CMD ["python", "app.py"]
README.md ADDED
File without changes
app.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
3
+
4
+ # Load fine-tuned model
5
+ model_path = "./ner_model"
6
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
7
+ model = AutoModelForTokenClassification.from_pretrained(model_path)
8
+
9
+ # Create NER pipeline
10
+ ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)
11
+
12
+ def ner_prediction(text):
13
+ entities = ner_pipeline(text)
14
+ return {e["word"]: e["entity"] for e in entities}
15
+
16
+ # Gradio UI
17
+ iface = gr.Interface(fn=ner_prediction, inputs="text", outputs="label")
18
+ iface.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ transformers
2
+ datasets
3
+ torch
4
+ seqeval
5
+ gradio
6
+ fastapi
7
+ uvicorn
train.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModelForTokenClassification, TrainingArguments, Trainer
3
+ from datasets import load_dataset, load_metric
4
+
5
+ # Load dataset
6
+ dataset = load_dataset("conll2003")
7
+
8
+ # Load tokenizer
9
+ model_checkpoint = "dbmdz/bert-large-cased-finetuned-conll03-english"
10
+ tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
11
+
12
+ # Tokenize the dataset
13
+ def tokenize_and_align_labels(examples):
14
+ tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)
15
+ return tokenized_inputs
16
+
17
+ tokenized_datasets = dataset.map(tokenize_and_align_labels, batched=True)
18
+
19
+ # Load model
20
+ model = AutoModelForTokenClassification.from_pretrained(model_checkpoint, num_labels=9)
21
+
22
+ # Training arguments
23
+ training_args = TrainingArguments(
24
+ output_dir="./ner_model",
25
+ evaluation_strategy="epoch",
26
+ save_strategy="epoch",
27
+ learning_rate=2e-5,
28
+ per_device_train_batch_size=16,
29
+ per_device_eval_batch_size=16,
30
+ num_train_epochs=3,
31
+ weight_decay=0.01,
32
+ )
33
+
34
+ # Load metric
35
+ metric = load_metric("seqeval")
36
+
37
+ def compute_metrics(eval_pred):
38
+ predictions, labels = eval_pred
39
+ return metric.compute(predictions=predictions.argmax(-1), references=labels)
40
+
41
+ # Trainer
42
+ trainer = Trainer(
43
+ model=model,
44
+ args=training_args,
45
+ train_dataset=tokenized_datasets["train"],
46
+ eval_dataset=tokenized_datasets["validation"],
47
+ tokenizer=tokenizer,
48
+ compute_metrics=compute_metrics,
49
+ )
50
+
51
+ # Train model
52
+ trainer.train()
53
+
54
+ # Save model
55
+ trainer.save_model("./ner_model")
56
+ tokenizer.save_pretrained("./ner_model")