ikraamkb commited on
Commit
0b363e7
Β·
verified Β·
1 Parent(s): 0f8e09c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -37
app.py CHANGED
@@ -5,26 +5,32 @@ from tika import parser # Apache Tika for document parsing
5
  import openpyxl
6
  from pptx import Presentation
7
  from PIL import Image
8
- from transformers import pipeline, BlipProcessor, BlipForConditionalGeneration
9
- import gradio as gr
10
  import torch
 
 
11
  import numpy as np
12
 
13
  # Initialize FastAPI
14
  app = FastAPI()
15
 
16
- print(f"πŸ”„ Loading models")
17
 
18
  # Load Hugging Face Models
 
 
 
 
 
19
 
20
- doc_qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
21
-
22
- # Load Image Captioning Model
23
- processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
24
- model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
25
- model = model.to(dtype=torch.float16) # Quantizing to FP16
 
26
 
27
- print("βœ… Models loaded")
28
 
29
  # Allowed File Extensions
30
  ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
@@ -73,31 +79,20 @@ def extract_text_from_excel(excel_bytes):
73
  except Exception as e:
74
  return f"❌ Error reading Excel: {str(e)}"
75
 
76
- def answer_question_from_document(file, question: str):
77
  print("πŸ“‚ Processing document for QA...")
 
 
 
78
 
79
- # Ensure file is not None
80
- if not file:
81
- return "❌ No file uploaded."
82
-
83
- ext = file.name.split(".")[-1].lower()
84
- print(f"πŸ” Validating file type: {ext}")
85
- if ext not in ALLOWED_EXTENSIONS:
86
- return f"❌ Unsupported file format: {ext}"
87
-
88
- # Read file contents
89
- try:
90
- with open(file.name, "rb") as f:
91
- file_bytes = f.read()
92
- except Exception as e:
93
- return f"❌ Error reading file: {str(e)}"
94
 
95
- # Extract text based on file type
96
- if ext == "pdf":
97
  text = extract_text_from_pdf(file_bytes)
98
- elif ext in ["docx", "pptx"]:
99
  text = extract_text_with_tika(file_bytes)
100
- elif ext == "xlsx":
101
  text = extract_text_from_excel(file_bytes)
102
  else:
103
  return "❌ Unsupported file format!"
@@ -106,8 +101,8 @@ def answer_question_from_document(file, question: str):
106
  return "⚠️ No text extracted from the document."
107
 
108
  truncated_text = truncate_text(text)
109
- print("πŸ€– Generating response...")
110
- response = doc_qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
111
 
112
  return response[0]["generated_text"]
113
 
@@ -118,12 +113,10 @@ def answer_question_from_image(image, question):
118
  image = Image.fromarray(image) # Convert to PIL Image
119
 
120
  print("πŸ–ΌοΈ Generating caption for image...")
121
- inputs = processor(images=image, return_tensors="pt", use_fast=True).to(dtype=torch.float16)
122
- output = model.generate(**inputs)
123
- caption = processor.decode(output[0], skip_special_tokens=True)
124
 
125
- print("πŸ€– Answering question based on caption...")
126
- response = doc_qa_pipeline(f"Question: {question}\nContext: {caption}")
127
 
128
  return response[0]["generated_text"]
129
  except Exception as e:
 
5
  import openpyxl
6
  from pptx import Presentation
7
  from PIL import Image
 
 
8
  import torch
9
+ from transformers import pipeline
10
+ import gradio as gr
11
  import numpy as np
12
 
13
  # Initialize FastAPI
14
  app = FastAPI()
15
 
16
+ print(f"πŸ”„ Loading models (Running on GPU: {torch.cuda.is_available()})")
17
 
18
  # Load Hugging Face Models
19
+ doc_qa_pipeline = pipeline(
20
+ "text-generation",
21
+ model="Qwen/Qwen2.5-VL-7B-Instruct",
22
+ device=0 if torch.cuda.is_available() else -1
23
+ )
24
 
25
+ image_captioning_pipeline = pipeline(
26
+ "image-to-text",
27
+ model="Salesforce/blip-image-captioning-base",
28
+ device=0 if torch.cuda.is_available() else -1,
29
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
30
+ use_fast=True
31
+ )
32
 
33
+ print("βœ… Models loaded successfully")
34
 
35
  # Allowed File Extensions
36
  ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
 
79
  except Exception as e:
80
  return f"❌ Error reading Excel: {str(e)}"
81
 
82
+ def answer_question_from_document(file: UploadFile, question: str):
83
  print("πŸ“‚ Processing document for QA...")
84
+ validation_error = validate_file_type(file)
85
+ if validation_error:
86
+ return validation_error
87
 
88
+ file_ext = file.filename.split(".")[-1].lower()
89
+ file_bytes = file.file.read()
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
+ if file_ext == "pdf":
 
92
  text = extract_text_from_pdf(file_bytes)
93
+ elif file_ext in ["docx", "pptx"]:
94
  text = extract_text_with_tika(file_bytes)
95
+ elif file_ext == "xlsx":
96
  text = extract_text_from_excel(file_bytes)
97
  else:
98
  return "❌ Unsupported file format!"
 
101
  return "⚠️ No text extracted from the document."
102
 
103
  truncated_text = truncate_text(text)
104
+ print("πŸ€– Generating response with Qwen2.5-VL-7B...")
105
+ response = doc_qa_pipeline(f"Question: {question}\nContext: {truncated_text}", max_length=100)
106
 
107
  return response[0]["generated_text"]
108
 
 
113
  image = Image.fromarray(image) # Convert to PIL Image
114
 
115
  print("πŸ–ΌοΈ Generating caption for image...")
116
+ caption = image_captioning_pipeline(image)[0]['generated_text']
 
 
117
 
118
+ print("πŸ€– Answering question based on caption with Qwen2.5-VL-7B...")
119
+ response = doc_qa_pipeline(f"Question: {question}\nContext: {caption}", max_length=100)
120
 
121
  return response[0]["generated_text"]
122
  except Exception as e: