benkada commited on
Commit
b0c5829
Β·
verified Β·
1 Parent(s): 1ece3c6

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +134 -51
main.py CHANGED
@@ -1,39 +1,22 @@
1
  import os
2
- import io
3
- from io import BytesIO
4
  from fastapi import FastAPI, UploadFile, File, Form
5
- from fastapi.responses import JSONResponse, HTMLResponse
6
  from fastapi.middleware.cors import CORSMiddleware
7
- from fastapi.staticfiles import StaticFiles
8
- from huggingface_hub import InferenceClient, login
9
  from PyPDF2 import PdfReader
10
  from docx import Document
11
  from PIL import Image
12
- from routers import ai # conservez vos routes annexes
13
-
14
- # ──────────────────────────────────────────────────────────────────────────────
15
- # 1) Authentification Hugging Face
16
- # ──────────────────────────────────────────────────────────────────────────────
17
- HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
18
- if not HF_TOKEN:
19
- raise RuntimeError(
20
- "Variable d'environnement HF_TOKEN absente ; crΓ©ez un jeton Β« Read Β» "
21
- "sur https://huggingface.co/settings/tokens et exportez-le (voir .env)."
22
- )
23
-
24
- login(token=HF_TOKEN) # Authentifie tout le process
25
-
26
- # ──────────────────────────────────────────────────────────────────────────────
27
- # 2) Configuration FastAPI
28
- # ──────────────────────────────────────────────────────────────────────────────
29
- PORT = int(os.getenv("PORT", 7860))
30
-
31
- app = FastAPI(
32
- title="AI Web App API",
33
- description="Backend API for AI-powered web application",
34
- version="1.0.0"
35
- )
36
 
 
 
 
 
 
 
 
37
  app.add_middleware(
38
  CORSMiddleware,
39
  allow_origins=["*"],
@@ -42,42 +25,142 @@ app.add_middleware(
42
  allow_headers=["*"],
43
  )
44
 
45
- app.mount("/", StaticFiles(directory=".", html=True), name="static")
46
- app.include_router(ai.router)
 
 
47
 
48
- # Clients HF (token passΓ© implicitement)
49
- summary_client = InferenceClient("facebook/bart-large-cnn")
50
- qa_client = InferenceClient("deepset/roberta-base-squad2")
51
- image_caption_client = InferenceClient("nlpconnect/vit-gpt2-image-captioning")
52
-
53
- # ──────────────────────────────────────────────────────────────────────────────
54
- # 3) Utils : extraction texte, routes API (inchangΓ©s ou presque)
55
- # ──────────────────────────────────────────────────────────────────────────────
56
  def extract_text_from_pdf(content: bytes) -> str:
 
57
  reader = PdfReader(io.BytesIO(content))
58
- return "\n".join(p.extract_text() or "" for p in reader.pages).strip()
 
 
 
59
 
60
  def extract_text_from_docx(content: bytes) -> str:
 
61
  doc = Document(io.BytesIO(content))
62
- return "\n".join(p.text for p in doc.paragraphs).strip()
 
 
63
 
64
  def process_uploaded_file(file: UploadFile) -> str:
65
  content = file.file.read()
66
- ext = file.filename.rsplit(".", 1)[-1].lower()
67
- if ext == "pdf":
 
68
  return extract_text_from_pdf(content)
69
- if ext == "docx":
70
  return extract_text_from_docx(content)
71
- if ext == "txt":
72
  return content.decode("utf-8").strip()
73
- raise ValueError("Type de fichier non supportΓ©")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
- # … (gardez vos trois routes /analyze, /ask, /interpret_image identiques)
76
 
77
- @app.get("/api/health")
78
- async def health_check():
79
- return {"status": "healthy", "version": "1.0.0", "hf_token_set": True}
80
 
 
81
  if __name__ == "__main__":
82
  import uvicorn
83
- uvicorn.run(app, host="0.0.0.0", port=PORT)
 
1
  import os
 
 
2
  from fastapi import FastAPI, UploadFile, File, Form
 
3
  from fastapi.middleware.cors import CORSMiddleware
4
+ from fastapi.responses import JSONResponse, HTMLResponse
5
+ from huggingface_hub import InferenceClient
6
  from PyPDF2 import PdfReader
7
  from docx import Document
8
  from PIL import Image
9
+ import io
10
+ from io import BytesIO
11
+ import requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ # Remplace ce token par le tien de manière sécurisée (variable d'environnement recommandée en production)
14
+ HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
15
+
16
+ # Initialisation de l'app FastAPI
17
+ app = FastAPI()
18
+
19
+ # Autoriser les requΓͺtes Cross-Origin
20
  app.add_middleware(
21
  CORSMiddleware,
22
  allow_origins=["*"],
 
25
  allow_headers=["*"],
26
  )
27
 
28
+ # Initialisation des clients Hugging Face avec authentification
29
+ summary_client = InferenceClient(model="facebook/bart-large-cnn", token=HUGGINGFACE_TOKEN)
30
+ qa_client = InferenceClient(model="deepset/roberta-base-squad2", token=HUGGINGFACE_TOKEN)
31
+ image_caption_client = InferenceClient(model="nlpconnect/vit-gpt2-image-captioning", token=HUGGINGFACE_TOKEN)
32
 
33
+ # Extraction du texte des fichiers
 
 
 
 
 
 
 
34
  def extract_text_from_pdf(content: bytes) -> str:
35
+ text = ""
36
  reader = PdfReader(io.BytesIO(content))
37
+ for page in reader.pages:
38
+ if page.extract_text():
39
+ text += page.extract_text() + "\n"
40
+ return text.strip()
41
 
42
  def extract_text_from_docx(content: bytes) -> str:
43
+ text = ""
44
  doc = Document(io.BytesIO(content))
45
+ for para in doc.paragraphs:
46
+ text += para.text + "\n"
47
+ return text.strip()
48
 
49
  def process_uploaded_file(file: UploadFile) -> str:
50
  content = file.file.read()
51
+ extension = file.filename.split('.')[-1].lower()
52
+
53
+ if extension == "pdf":
54
  return extract_text_from_pdf(content)
55
+ elif extension == "docx":
56
  return extract_text_from_docx(content)
57
+ elif extension == "txt":
58
  return content.decode("utf-8").strip()
59
+ else:
60
+ raise ValueError("Type de fichier non supportΓ©")
61
+
62
+ # Point d'entrΓ©e HTML
63
+ @app.get("/", response_class=HTMLResponse)
64
+ async def serve_homepage():
65
+ with open("index.html", "r", encoding="utf-8") as f:
66
+ return HTMLResponse(content=f.read(), status_code=200)
67
+
68
+ # RΓ©sumΓ©
69
+ @app.post("/analyze")
70
+ async def analyze_file(file: UploadFile = File(...)):
71
+ try:
72
+ text = process_uploaded_file(file)
73
+
74
+ if len(text) < 20:
75
+ return {"summary": "Document trop court pour Γͺtre rΓ©sumΓ©."}
76
+
77
+ summary = summary_client.summarization(text[:3000])
78
+ return {"summary": summary}
79
+
80
+ except Exception as e:
81
+ return JSONResponse(status_code=500, content={"error": f"Erreur lors de l'analyse: {str(e)}"})
82
+
83
+ # Question-RΓ©ponse
84
+ @app.post("/ask")
85
+ async def ask_question(file: UploadFile = File(...), question: str = Form(...)):
86
+ try:
87
+ # Determine if the file is an image
88
+ content_type = file.content_type
89
+ if content_type.startswith("image/"):
90
+ image_bytes = await file.read()
91
+ image_pil = Image.open(io.BytesIO(image_bytes)).convert("RGB")
92
+ image_pil.thumbnail((1024, 1024))
93
+
94
+ img_byte_arr = BytesIO()
95
+ image_pil.save(img_byte_arr, format='JPEG')
96
+ img_byte_arr = img_byte_arr.getvalue()
97
+
98
+ # Generate image description
99
+ result = image_caption_client.image_to_text(img_byte_arr)
100
+ if isinstance(result, dict):
101
+ context = result.get("generated_text") or result.get("caption") or ""
102
+ elif isinstance(result, list) and len(result) > 0:
103
+ context = result[0].get("generated_text", "")
104
+ elif isinstance(result, str):
105
+ context = result
106
+ else:
107
+ context = ""
108
+
109
+ else:
110
+ # Not an image, process as document
111
+ text = process_uploaded_file(file)
112
+ if len(text) < 20:
113
+ return {"answer": "Document trop court pour rΓ©pondre Γ  la question."}
114
+ context = text[:3000]
115
+
116
+ if not context:
117
+ return {"answer": "Aucune information disponible pour rΓ©pondre Γ  la question."}
118
+
119
+ result = qa_client.question_answering(question=question, context=context)
120
+ return {"answer": result.get("answer", "Aucune rΓ©ponse trouvΓ©e.")}
121
+
122
+ except Exception as e:
123
+ return JSONResponse(status_code=500, content={"error": f"Erreur lors de la recherche de rΓ©ponse: {str(e)}"})
124
+
125
+ # InterprΓ©tation d'Image
126
+ @app.post("/interpret_image")
127
+ async def interpret_image(image: UploadFile = File(...)):
128
+ try:
129
+ # Lire l'image
130
+ image_bytes = await image.read()
131
+
132
+ # Ouvrir l'image avec PIL
133
+ image_pil = Image.open(io.BytesIO(image_bytes))
134
+ image_pil = image_pil.convert("RGB")
135
+ image_pil.thumbnail((1024, 1024))
136
+
137
+ # Convertir en bytes (JPEG)
138
+ img_byte_arr = BytesIO()
139
+ image_pil.save(img_byte_arr, format='JPEG')
140
+ img_byte_arr = img_byte_arr.getvalue()
141
+
142
+ # Appeler le modèle
143
+ result = image_caption_client.image_to_text(img_byte_arr)
144
+
145
+ # πŸ” Affichage du rΓ©sultat brut pour dΓ©bogage
146
+ print("Résultat brut du modèle image-to-text:", result)
147
+
148
+ # Extraire la description si disponible
149
+ if isinstance(result, dict):
150
+ description = result.get("generated_text") or result.get("caption") or "Description non trouvΓ©e."
151
+ elif isinstance(result, list) and len(result) > 0:
152
+ description = result[0].get("generated_text", "Description non trouvΓ©e.")
153
+ elif isinstance(result, str):
154
+ description = result
155
+ else:
156
+ description = "Description non trouvΓ©e."
157
 
158
+ return {"description": description}
159
 
160
+ except Exception as e:
161
+ return JSONResponse(status_code=500, content={"error": f"Erreur lors de l'interprΓ©tation de l'image: {str(e)}"})
 
162
 
163
+ # DΓ©marrage local
164
  if __name__ == "__main__":
165
  import uvicorn
166
+ uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)