ikraamkb commited on
Commit
49b29c3
Β·
verified Β·
1 Parent(s): 17a90d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -60
app.py CHANGED
@@ -1,37 +1,25 @@
1
- from fastapi import FastAPI, File, UploadFile
 
2
  import pdfplumber
3
- import pytesseract
4
- from PIL import Image
5
- import easyocr
6
  import docx
7
  import openpyxl
8
  from pptx import Presentation
9
  from transformers import pipeline
10
  import gradio as gr
11
- import pandas as pd
12
- import matplotlib.pyplot as plt
13
- import seaborn as sns
14
  from fastapi.responses import RedirectResponse
15
- import io
16
 
17
  # βœ… Initialize FastAPI
18
  app = FastAPI()
19
 
20
- # βœ… Load AI Models
21
- from transformers import pipeline
22
-
23
- qa_pipeline = pipeline("text2text-generation",model="google/flan-t5-large",tokenizer="google/flan-t5-large",use_fast=True,device=0)
24
- table_analyzer = pipeline("table-question-answering",model="google/tapas-large-finetuned-wtq",tokenizer="google/tapas-large-finetuned-wtq",use_fast=True,device=0)
25
- code_generator = pipeline("text-generation",model="openai-community/gpt2-medium",tokenizer="openai-community/gpt2-medium",use_fast=True,device=0)
26
- vqa_pipeline = pipeline("image-to-text",model="Salesforce/blip-vqa-base",device=0 )
27
 
28
-
29
- # βœ… Function to truncate text to 450 tokens
30
  def truncate_text(text, max_tokens=450):
31
  words = text.split()
32
- return " ".join(words[:max_tokens]) # βœ… Keeps only the first 450 words
33
 
34
- # βœ… Functions for Document & Image QA
35
  def extract_text_from_pdf(pdf_file):
36
  text = ""
37
  with pdfplumber.open(pdf_file) as pdf:
@@ -60,11 +48,7 @@ def extract_text_from_excel(excel_file):
60
  text.append(" ".join(map(str, row)))
61
  return "\n".join(text)
62
 
63
- def extract_text_from_image(image_file):
64
- reader = easyocr.Reader(["en"])
65
- result = reader.readtext(image_file)
66
- return " ".join([res[1] for res in result])
67
-
68
  def answer_question_from_document(file, question):
69
  file_ext = file.name.split(".")[-1].lower()
70
 
@@ -83,25 +67,11 @@ def answer_question_from_document(file, question):
83
  return "No text extracted from the document."
84
 
85
  truncated_text = truncate_text(text) # βœ… Prevents token limit error
86
-
87
- input_text = f"Question: {question} Context: {truncated_text}" # βœ… Proper FLAN-T5 format
88
- response = qa_pipeline(input_text)
89
-
90
- return response[0]["generated_text"] # βœ… Returns the correct output
91
-
92
- def answer_question_from_image(image, question):
93
- image_text = extract_text_from_image(image)
94
- if not image_text:
95
- return "No text detected in the image."
96
-
97
- truncated_text = truncate_text(image_text) # βœ… Prevents token limit error
98
-
99
  input_text = f"Question: {question} Context: {truncated_text}"
100
  response = qa_pipeline(input_text)
101
-
102
  return response[0]["generated_text"]
103
 
104
- # βœ… Gradio UI for Document & Image QA
105
  doc_interface = gr.Interface(
106
  fn=answer_question_from_document,
107
  inputs=[gr.File(label="Upload Document"), gr.Textbox(label="Ask a Question")],
@@ -109,28 +79,8 @@ doc_interface = gr.Interface(
109
  title="AI Document Question Answering"
110
  )
111
 
112
- img_interface = gr.Interface(
113
- fn=answer_question_from_image,
114
- inputs=[gr.Image(label="Upload Image"), gr.Textbox(label="Ask a Question")],
115
- outputs="text",
116
- title="AI Image Question Answering"
117
- )
118
-
119
-
120
- # βœ… Gradio UI for Data Visualization
121
- viz_interface = gr.Interface(
122
- fn=generate_visualization,
123
- inputs=[
124
- gr.File(label="Upload Excel File"),
125
- gr.Radio(["Bar Chart", "Line Chart", "Scatter Plot", "Histogram"], label="Choose Visualization Type"),
126
- gr.Textbox(label="Enter Visualization Request")
127
- ],
128
- outputs=[gr.Code(label="Generated Python Code"), gr.Image(label="Visualization Output")],
129
- title="AI-Powered Data Visualization"
130
- )
131
-
132
  # βœ… Mount Gradio Interfaces
133
- demo = gr.TabbedInterface([doc_interface, img_interface], ["Document QA", "Image QA"])
134
  app = gr.mount_gradio_app(app, demo, path="/")
135
 
136
  @app.get("/")
 
1
+
2
+ from fastapi import FastAPI, UploadFile
3
  import pdfplumber
 
 
 
4
  import docx
5
  import openpyxl
6
  from pptx import Presentation
7
  from transformers import pipeline
8
  import gradio as gr
 
 
 
9
  from fastapi.responses import RedirectResponse
 
10
 
11
  # βœ… Initialize FastAPI
12
  app = FastAPI()
13
 
14
+ # βœ… Load AI Model
15
+ qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-large", tokenizer="google/flan-t5-large", use_fast=True, device=0)
 
 
 
 
 
16
 
17
+ # βœ… Function to truncate text to avoid token limit errors
 
18
  def truncate_text(text, max_tokens=450):
19
  words = text.split()
20
+ return " ".join(words[:max_tokens])
21
 
22
+ # βœ… Functions to extract text from different file formats
23
  def extract_text_from_pdf(pdf_file):
24
  text = ""
25
  with pdfplumber.open(pdf_file) as pdf:
 
48
  text.append(" ".join(map(str, row)))
49
  return "\n".join(text)
50
 
51
+ # βœ… Function to answer questions based on document content
 
 
 
 
52
  def answer_question_from_document(file, question):
53
  file_ext = file.name.split(".")[-1].lower()
54
 
 
67
  return "No text extracted from the document."
68
 
69
  truncated_text = truncate_text(text) # βœ… Prevents token limit error
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  input_text = f"Question: {question} Context: {truncated_text}"
71
  response = qa_pipeline(input_text)
 
72
  return response[0]["generated_text"]
73
 
74
+ # βœ… Gradio UI for Document QA
75
  doc_interface = gr.Interface(
76
  fn=answer_question_from_document,
77
  inputs=[gr.File(label="Upload Document"), gr.Textbox(label="Ask a Question")],
 
79
  title="AI Document Question Answering"
80
  )
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  # βœ… Mount Gradio Interfaces
83
+ demo = gr.TabbedInterface([doc_interface], ["Document QA"])
84
  app = gr.mount_gradio_app(app, demo, path="/")
85
 
86
  @app.get("/")