Yassine commited on
Commit
05deaf1
·
1 Parent(s): 0f8454b

Initial commit: FastAPI application

Browse files
Files changed (4) hide show
  1. Dockerfile +14 -0
  2. README.md +19 -5
  3. main.py +192 -0
  4. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+
14
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,24 @@
1
  ---
2
- title: Plan Genie Ai
3
- emoji: 🐨
4
- colorFrom: yellow
5
- colorTo: red
6
  sdk: docker
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Plan Genie AI
3
+ emoji: 🚀
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: docker
7
  pinned: false
8
  ---
9
 
10
+ # Plan Genie AI
11
+
12
+ A FastAPI-based NLP service for task analysis and entity extraction.
13
+
14
+ ## Features
15
+
16
+ - Text type classification
17
+ - Named Entity Recognition (NER)
18
+ - Entity extraction and analysis
19
+
20
+ ## API Endpoints
21
+
22
+ - `/predict-type/`: Classify the type of text
23
+ - `/extract-entities/`: Extract named entities from text
24
+ - `/analyze-text/`: Combined analysis of text type and entities
main.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Body
2
+ import torch
3
+ import spacy
4
+ import os
5
+ from pathlib import Path
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from transformers import AutoTokenizer, AutoModelForTokenClassification, AutoModelForSequenceClassification
8
+ from pydantic import BaseModel
9
+
10
+ # Define input model
11
+
12
+
13
+ class TextInput(BaseModel):
14
+ text: str
15
+
16
+
17
+ # Initialize FastAPI
18
+ app = FastAPI()
19
+
20
+ # Add CORS middleware
21
+ app.add_middleware(
22
+ CORSMiddleware,
23
+ # Vous pouvez restreindre ceci à votre frontend spécifique
24
+ allow_origins=["*"],
25
+ allow_credentials=True,
26
+ allow_methods=["*"],
27
+ allow_headers=["*"],
28
+ )
29
+
30
+ # Get base directory
31
+ base_dir = Path(__file__).parent.absolute()
32
+
33
+ # Your Hugging Face Hub username
34
+ HF_USERNAME = "YassineJedidi" # Replace with your actual username
35
+
36
+ # Try to load models from Hugging Face Hub
37
+ try:
38
+ print("Loading models from Hugging Face Hub")
39
+
40
+ # Model repositories on Hugging Face
41
+ tokenizer_repo = f"{HF_USERNAME}/tasks-tokenizer"
42
+ ner_model_repo = f"{HF_USERNAME}/tasks-ner"
43
+ type_model_repo = f"{HF_USERNAME}/tasks-type"
44
+
45
+ print(f"Loading tokenizer from: {tokenizer_repo}")
46
+ print(f"Loading NER model from: {ner_model_repo}")
47
+ print(f"Loading type model from: {type_model_repo}")
48
+
49
+ # Load models from Hugging Face Hub
50
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_repo)
51
+ ner_model = AutoModelForTokenClassification.from_pretrained(ner_model_repo)
52
+ type_model = AutoModelForSequenceClassification.from_pretrained(
53
+ type_model_repo)
54
+
55
+ except Exception as e:
56
+ print(f"Error loading models from Hugging Face Hub: {e}")
57
+
58
+ # Fallback to local files if available
59
+ try:
60
+ # Convert paths to strings with forward slashes
61
+ tokenizer_path = str(base_dir / "models" /
62
+ "tasks-tokenizer").replace("\\", "/")
63
+ ner_model_path = str(base_dir / "models" /
64
+ "tasks-ner").replace("\\", "/")
65
+ type_model_path = str(base_dir / "models" /
66
+ "tasks-types").replace("\\", "/")
67
+
68
+ print(f"Falling back to local models")
69
+ print(f"Loading tokenizer from: {tokenizer_path}")
70
+ print(f"Loading NER model from: {ner_model_path}")
71
+ print(f"Loading type model from: {type_model_path}")
72
+
73
+ # Load models from local files
74
+ tokenizer = AutoTokenizer.from_pretrained(
75
+ tokenizer_path, local_files_only=True)
76
+ ner_model = AutoModelForTokenClassification.from_pretrained(
77
+ ner_model_path, local_files_only=True)
78
+ type_model = AutoModelForSequenceClassification.from_pretrained(
79
+ type_model_path, local_files_only=True)
80
+
81
+ except Exception as e:
82
+ print(f"Error loading local models: {e}")
83
+ # Fallback to base model from HuggingFace
84
+ print("Falling back to base CamemBERT model from HuggingFace Hub")
85
+ tokenizer = AutoTokenizer.from_pretrained("camembert-base")
86
+ ner_model = AutoModelForTokenClassification.from_pretrained(
87
+ "camembert-base")
88
+ type_model = AutoModelForSequenceClassification.from_pretrained(
89
+ "camembert-base")
90
+
91
+ # Load spaCy for tokenization
92
+ nlp = spacy.load('fr_core_news_lg')
93
+
94
+ # Set device (CPU or GPU)
95
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
96
+ ner_model = ner_model.to(device)
97
+ type_model = type_model.to(device)
98
+
99
+ # Retrieve label mappings
100
+ id2label = ner_model.config.id2label
101
+ id2type = type_model.config.id2label
102
+
103
+
104
+ @app.get("/")
105
+ def root():
106
+ return {"message": "FastAPI NLP Model is running!"}
107
+
108
+
109
+ @app.post("/predict-type/")
110
+ async def predict_type(input_data: TextInput):
111
+ text = input_data.text
112
+ inputs = tokenizer(text, return_tensors="pt",
113
+ truncation=True, padding=True).to(device)
114
+ with torch.no_grad():
115
+ outputs = type_model(**inputs)
116
+
117
+ predicted_class_id = outputs.logits.argmax().item()
118
+ predicted_type = id2type[predicted_class_id]
119
+ confidence = torch.softmax(outputs.logits, dim=1).max().item()
120
+
121
+ return {"type": predicted_type, "confidence": confidence}
122
+
123
+
124
+ @app.post("/extract-entities/")
125
+ async def extract_entities(input_data: TextInput):
126
+ text = input_data.text
127
+ doc = nlp(text)
128
+ tokens = [token.text for token in doc]
129
+
130
+ inputs = tokenizer(tokens, is_split_into_words=True,
131
+ return_tensors="pt", truncation=True, padding=True).to(device)
132
+ with torch.no_grad():
133
+ outputs = ner_model(**inputs)
134
+
135
+ predictions = outputs.logits.argmax(dim=2)
136
+ entities = {}
137
+ current_entity = None
138
+ current_text = []
139
+
140
+ word_ids = inputs.word_ids(0)
141
+ for idx, word_idx in enumerate(word_ids):
142
+ if word_idx is None:
143
+ continue
144
+ if idx > 0 and word_ids[idx-1] == word_idx:
145
+ continue
146
+
147
+ prediction = predictions[0, idx].item()
148
+ predicted_label = id2label[prediction]
149
+
150
+ if predicted_label.startswith("B-"):
151
+ if current_entity:
152
+ entity_type = current_entity[2:]
153
+ if entity_type not in entities:
154
+ entities[entity_type] = []
155
+ entities[entity_type].append(" ".join(current_text))
156
+
157
+ current_entity = predicted_label
158
+ current_text = [tokens[word_idx]]
159
+
160
+ elif predicted_label.startswith("I-") and current_entity and predicted_label[2:] == current_entity[2:]:
161
+ current_text.append(tokens[word_idx])
162
+
163
+ else:
164
+ if current_entity:
165
+ entity_type = current_entity[2:]
166
+ if entity_type not in entities:
167
+ entities[entity_type] = []
168
+ entities[entity_type].append(" ".join(current_text))
169
+ current_entity = None
170
+ current_text = []
171
+
172
+ if current_entity:
173
+ entity_type = current_entity[2:]
174
+ if entity_type not in entities:
175
+ entities[entity_type] = []
176
+ entities[entity_type].append(" ".join(current_text))
177
+
178
+ return {"entities": entities}
179
+
180
+
181
+ @app.post("/analyze-text/")
182
+ async def analyze_text(input_data: TextInput):
183
+ type_result = await predict_type(input_data)
184
+ text_type = type_result["type"]
185
+ confidence = type_result["confidence"]
186
+ entities = (await extract_entities(input_data))["entities"]
187
+
188
+ return {
189
+ "type": text_type,
190
+ "confidence": confidence,
191
+ "entities": entities
192
+ }
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ torch==2.5.1
4
+ transformers==4.49.0
5
+ pydantic==2.9.2
6
+ safetensors==0.4.5
7
+ spacy==3.8.4