ikraamkb commited on
Commit
01cb6f1
Β·
verified Β·
1 Parent(s): 26f0a9c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -123
app.py CHANGED
@@ -132,138 +132,130 @@ def home():
132
  return RedirectResponse(url="/")
133
  """
134
 
135
- from fastapi import FastAPI, File, UploadFile
136
- import pymupdf as fitz# PyMuPDF for PDF parsing
137
- import openpyxl
138
- from pptx import Presentation
139
- import torch
140
- from torchvision import transforms
141
- from torchvision.models.detection import fasterrcnn_resnet50_fpn
142
- from PIL import Image
143
- from transformers import pipeline
144
  import gradio as gr
 
 
 
 
 
145
  from fastapi.responses import RedirectResponse
146
- import numpy as np
147
- import docx
148
-
149
- # Initialize FastAPI
150
- print("πŸš€ FastAPI server is starting...")
151
- app = FastAPI()
152
-
153
- # Load AI Model for Question Answering (DeepSeek-V2-Chat)
154
- from transformers import AutoModelForCausalLM, AutoTokenizer
155
-
156
- # Preload Hugging Face model
157
- print(f"πŸ”„ Loading models")
158
- qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
159
-
160
- # Load Pretrained Object Detection Model (Torchvision)
161
- from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
162
- weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
163
- model = fasterrcnn_resnet50_fpn(weights=weights)
164
- model.eval()
165
-
166
- # Image Transformations
167
- transform = transforms.Compose([
168
- transforms.ToTensor()
169
- ])
170
 
171
- # Allowed File Extensions
172
- ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
 
 
 
173
 
174
- def validate_file_type(file):
175
- ext = file.name.split(".")[-1].lower()
176
- print(f"πŸ” Validating file type: {ext}")
177
- if ext not in ALLOWED_EXTENSIONS:
178
- return f"❌ Unsupported file format: {ext}"
179
- return None
180
-
181
- # Function to truncate text to 450 tokens
182
- def truncate_text(text, max_tokens=450):
183
- words = text.split()
184
- truncated = " ".join(words[:max_tokens])
185
- print(f"βœ‚οΈ Truncated text to {max_tokens} tokens.")
186
- return truncated
187
-
188
- # Document Text Extraction Functions
189
- def extract_text_from_pdf(pdf_file):
190
- try:
191
- print("πŸ“„ Extracting text from PDF...")
192
- doc = fitz.open(pdf_file)
193
- text = "\n".join([page.get_text("text") for page in doc])
194
- print("βœ… PDF text extraction completed.")
195
- return text if text else "⚠️ No text found."
196
- except Exception as e:
197
- return f"❌ Error reading PDF: {str(e)}"
198
 
199
- def extract_text_from_docx(docx_file):
 
200
  try:
201
- print("πŸ“ Extracting text from DOCX...")
202
- doc = docx.Document(docx_file)
203
- text = "\n".join([para.text for para in doc.paragraphs])
204
- print("βœ… DOCX text extraction completed.")
205
- return text if text else "⚠️ No text found."
206
- except Exception as e:
207
- return f"❌ Error reading DOCX: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
- def extract_text_from_pptx(pptx_file):
210
- try:
211
- print("πŸ“Š Extracting text from PPTX...")
212
- ppt = Presentation(pptx_file)
213
- text = []
214
- for slide in ppt.slides:
215
- for shape in slide.shapes:
216
- if hasattr(shape, "text"):
217
- text.append(shape.text)
218
- print("βœ… PPTX text extraction completed.")
219
- return "\n".join(text) if text else "⚠️ No text found."
220
  except Exception as e:
221
- return f"❌ Error reading PPTX: {str(e)}"
222
-
223
- def extract_text_from_excel(excel_file):
224
- try:
225
- print("πŸ“Š Extracting text from Excel...")
226
- wb = openpyxl.load_workbook(excel_file, read_only=True)
227
- text = []
228
- for sheet in wb.worksheets:
229
- for row in sheet.iter_rows(values_only=True):
230
- text.append(" ".join(map(str, row)))
231
- print("βœ… Excel text extraction completed.")
232
- return "\n".join(text) if text else "⚠️ No text found."
233
- except Exception as e:
234
- return f"❌ Error reading Excel: {str(e)}"
235
-
236
- def answer_question_from_document(file, question):
237
- print("πŸ“‚ Processing document for QA...")
238
- validation_error = validate_file_type(file)
239
- if validation_error:
240
- return validation_error
241
- file_ext = file.name.split(".")[-1].lower()
242
- if file_ext == "pdf":
243
- text = extract_text_from_pdf(file)
244
- elif file_ext == "docx":
245
- text = extract_text_from_docx(file)
246
- elif file_ext == "pptx":
247
- text = extract_text_from_pptx(file)
248
- elif file_ext == "xlsx":
249
- text = extract_text_from_excel(file)
250
- else:
251
- return "❌ Unsupported file format!"
252
- if not text:
253
- return "⚠️ No text extracted from the document."
254
- truncated_text = truncate_text(text)
255
- print("πŸ€– Generating response...")
256
- response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
257
- print("βœ… AI response generated.")
258
- return response[0]["generated_text"]
259
-
260
- print("βœ… Models loaded successfully.")
261
-
262
- doc_interface = gr.Interface(fn=answer_question_from_document, inputs=[gr.File(), gr.Textbox()], outputs="text")
263
-
264
- demo = gr.TabbedInterface([doc_interface], ["Document QA"])
265
- app = gr.mount_gradio_app(app, demo, path="/")
266
 
267
  @app.get("/")
268
  def home():
 
269
  return RedirectResponse(url="/")
 
 
132
  return RedirectResponse(url="/")
133
  """
134
 
135
+
 
 
 
 
 
 
 
 
136
  import gradio as gr
137
+ import pandas as pd
138
+ import matplotlib.pyplot as plt
139
+ import seaborn as sns
140
+ from fastapi import FastAPI
141
+ from transformers import pipeline
142
  from fastapi.responses import RedirectResponse
143
+ import io
144
+ import ast
145
+ from PIL import Image
146
+ import re
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
+ # βœ… Load AI models
149
+ print("πŸš€ Initializing application...")
150
+ table_analyzer = pipeline("table-question-answering", model="facebook/tapas-large-finetuned-wtq", device=-1)
151
+ code_generator = pipeline("text-generation", model="distilgpt2", device=-1)
152
+ print("βœ… AI models loaded successfully!")
153
 
154
+ # βœ… Initialize FastAPI
155
+ app = FastAPI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
+ def generate_visualization(excel_file, viz_type, user_request):
158
+ """Generates Python visualization code and insights based on user requests and Excel data."""
159
  try:
160
+ print("πŸ“‚ Loading Excel file...")
161
+ df = pd.read_excel(excel_file)
162
+ print("βœ… File loaded successfully! Columns:", df.columns)
163
+
164
+ # Convert date columns
165
+ for col in df.select_dtypes(include=["object", "datetime64"]):
166
+ try:
167
+ df[col] = pd.to_datetime(df[col], errors='coerce').dt.strftime('%Y-%m-%d %H:%M:%S')
168
+ except Exception:
169
+ pass
170
+
171
+ df = df.fillna(0) # Fill NaN values
172
+
173
+ formatted_table = [{col: str(value) for col, value in row.items()} for row in df.to_dict(orient="records")]
174
+ print(f"πŸ“Š Formatted table: {formatted_table[:5]}")
175
+ print(f"πŸ” User request: {user_request}")
176
+
177
+ if not isinstance(user_request, str):
178
+ raise ValueError("User request must be a string")
179
+
180
+ print("🧠 Sending data to TAPAS model for analysis...")
181
+ table_answer = table_analyzer({"table": formatted_table, "query": user_request})
182
+ print("βœ… Table analysis completed!")
183
+
184
+ # βœ… AI-generated code
185
+ prompt = f"""Generate clean and executable Python code to visualize the following dataset:
186
+ Columns: {list(df.columns)}
187
+ Visualization type: {viz_type}
188
+ User request: {user_request}
189
+ Use the provided DataFrame 'df' without reloading it.
190
+ Ensure 'plt.show()' is at the end.
191
+ """
192
+
193
+ print("πŸ€– Sending request to AI code generator...")
194
+ generated_code = code_generator(prompt, max_length=200)[0]['generated_text']
195
+ print("πŸ“ AI-generated code:")
196
+ print(generated_code)
197
+
198
+ # βœ… Validate generated code
199
+ valid_syntax = re.match(r".*plt\.show\(\).*", generated_code, re.DOTALL)
200
+ if not valid_syntax:
201
+ print("⚠️ AI code generation failed! Using fallback visualization...")
202
+ return generated_code, "Error: The AI did not generate a valid Matplotlib script."
203
+
204
+ try:
205
+ ast.parse(generated_code) # Syntax validation
206
+ except SyntaxError as e:
207
+ return generated_code, f"Syntax error: {e}"
208
+
209
+ # βœ… Execute AI-generated code
210
+ try:
211
+ print("⚑ Executing AI-generated code...")
212
+ exec_globals = {"plt": plt, "sns": sns, "pd": pd, "df": df.copy(), "io": io}
213
+ exec(generated_code, exec_globals)
214
+
215
+ fig = plt.gcf()
216
+ img_buf = io.BytesIO()
217
+ fig.savefig(img_buf, format='png')
218
+ img_buf.seek(0)
219
+ plt.close(fig)
220
+ except Exception as e:
221
+ print(f"❌ Error executing AI-generated code: {str(e)}")
222
+ return generated_code, f"Error executing visualization: {str(e)}"
223
+
224
+ img = Image.open(img_buf)
225
+ return generated_code, img
226
 
 
 
 
 
 
 
 
 
 
 
 
227
  except Exception as e:
228
+ print(f"❌ An error occurred: {str(e)}")
229
+ return f"Error: {str(e)}", "Table analysis failed."
230
+
231
+ # βœ… Gradio UI setup
232
+ print("πŸ› οΈ Setting up Gradio interface...")
233
+ gradio_ui = gr.Interface(
234
+ fn=generate_visualization,
235
+ inputs=[
236
+ gr.File(label="Upload Excel File"),
237
+ gr.Radio([
238
+ "Bar Chart", "Line Chart", "Scatter Plot", "Histogram",
239
+ "Boxplot", "Heatmap", "Pie Chart", "Area Chart", "Bubble Chart", "Violin Plot"
240
+ ], label="Select Visualization Type"),
241
+ gr.Textbox(label="Enter visualization request (e.g., 'Sales trend over time')")
242
+ ],
243
+ outputs=[
244
+ gr.Code(label="Generated Python Code"),
245
+ gr.Image(label="Visualization Result")
246
+ ],
247
+ title="AI-Powered Data Visualization πŸ“Š",
248
+ description="Upload an Excel file, choose your visualization type, and ask a question about your data!"
249
+ )
250
+ print("βœ… Gradio interface configured successfully!")
251
+
252
+ # βœ… Mount Gradio app
253
+ print("πŸ”— Mounting Gradio interface on FastAPI...")
254
+ app = gr.mount_gradio_app(app, gradio_ui, path="/")
255
+ print("βœ… Gradio interface mounted successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
 
257
  @app.get("/")
258
  def home():
259
+ print("🏠 Redirecting to UI...")
260
  return RedirectResponse(url="/")
261
+