|
import gradio as gr |
|
from langchain.document_loaders import PyPDFLoader |
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
from langchain.vectorstores import FAISS |
|
from langchain.chat_models import ChatOpenAI |
|
from langchain.llms import OpenAI |
|
|
|
def process_pdfs(files): |
|
"""Process uploaded PDFs and return extracted text.""" |
|
texts = [] |
|
for file in files: |
|
loader = PyPDFLoader(file.name) |
|
docs = loader.load() |
|
texts.append("\n".join([doc.page_content for doc in docs])) |
|
return "\n\n".join(texts) |
|
|
|
def create_gradio_interface(): |
|
"""Create and launch the Gradio interface.""" |
|
with gr.Blocks() as demo: |
|
gr.Markdown("# PDF Text Extractor") |
|
|
|
pdf_files = gr.Files(label="Upload PDF Documents", type="file") |
|
|
|
output_text = gr.Textbox(label="Extracted Text", lines=10) |
|
|
|
extract_button = gr.Button("Extract Text") |
|
|
|
extract_button.click(process_pdfs, inputs=[pdf_files], outputs=[output_text]) |
|
|
|
return demo |
|
|
|
if __name__ == "__main__": |
|
demo = create_gradio_interface() |
|
demo.launch() |
|
|
|
|