vharika31 commited on
Commit
da0b26a
Β·
verified Β·
1 Parent(s): 1c4093a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -0
app.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # βœ… Install dependencies
2
+
3
+ # πŸ“š Imports
4
+ import fitz # PyMuPDF
5
+ import requests
6
+ import gradio as gr
7
+ import tempfile
8
+ import os
9
+ import io
10
+
11
+ # πŸ”‘ Enter your OpenRouter API key here
12
+ OPENROUTER_API_KEY = "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"
13
+
14
+ # Global variable to store the extracted text
15
+ pdf_text = ""
16
+
17
+ # πŸ“„ Extract text from PDF
18
+ def extract_text_from_pdf(file_obj):
19
+ global pdf_text
20
+
21
+ if file_obj is None:
22
+ return "Please upload a PDF file first."
23
+
24
+ try:
25
+ # Get the file path from the file object
26
+ # In Gradio, the file object has a name attribute that contains the path
27
+ file_path = file_obj.name
28
+
29
+ # Now open the file with PyMuPDF
30
+ doc = fitz.open(file_path)
31
+ text = ""
32
+ for page in doc:
33
+ text += page.get_text()
34
+ doc.close()
35
+
36
+ # Store the text for later use
37
+ pdf_text = text
38
+
39
+ # Return preview of the extracted text
40
+ preview = text[:500] + "..." if len(text) > 500 else text
41
+ return f"βœ… PDF uploaded and processed successfully. Preview:\n\n{preview}"
42
+
43
+ except Exception as e:
44
+ return f"❌ Error processing PDF: {str(e)}"
45
+
46
+ # πŸ’¬ Ask the open-source LLM (Mistral-7B via OpenRouter)
47
+ def ask_open_source_llm(question, model_choice="nvidia/llama-3.1-nemotron-nano-8b-v1:free"):
48
+ global pdf_text
49
+
50
+ if not pdf_text:
51
+ return "⚠️ Please upload a PDF document first."
52
+
53
+ # Limit text to prevent token overflow
54
+ limited_text = pdf_text[:3000] # First 3000 characters
55
+
56
+ # Create prompt based on question
57
+ if not question:
58
+ prompt = f"Summarize the following document:\n\n{limited_text}"
59
+ else:
60
+ prompt = f"The document says:\n\n{limited_text}\n\nNow answer this: {question}"
61
+
62
+ # Call the API
63
+ url = "https://openrouter.ai/api/v1/chat/completions"
64
+ headers = {
65
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
66
+ "Content-Type": "application/json"
67
+ }
68
+
69
+ data = {
70
+ "model": model_choice,
71
+ "messages": [{"role": "user", "content": prompt}]
72
+ }
73
+
74
+ try:
75
+ response = requests.post(url, headers=headers, json=data)
76
+ if response.status_code == 200:
77
+ return response.json()["choices"][0]["message"]["content"]
78
+ else:
79
+ return f"❌ Error: {response.text}"
80
+ except Exception as e:
81
+ return f"❌ An error occurred: {str(e)}"
82
+
83
+ # Gradio app function
84
+ def process_query(pdf_file, question, model_choice):
85
+ # First extract text if a PDF is uploaded
86
+ if pdf_file is not None:
87
+ result = extract_text_from_pdf(pdf_file)
88
+ if result.startswith("❌ Error"):
89
+ return result
90
+
91
+ # Then process the question
92
+ if question:
93
+ return ask_open_source_llm(question, model_choice)
94
+ else:
95
+ return ask_open_source_llm("Please summarize this document.", model_choice)
96
+
97
+ # Create Gradio interface
98
+ with gr.Blocks(title="PDF Document Analysis") as app:
99
+ gr.Markdown("# πŸ“š PDF Document Analysis with LLM")
100
+ gr.Markdown("Upload a PDF document and ask questions about its content.")
101
+
102
+ with gr.Row():
103
+ with gr.Column(scale=1):
104
+ pdf_input = gr.File(label="Upload PDF Document", file_types=[".pdf"])
105
+ model_choice = gr.Dropdown(
106
+ choices=[
107
+ "nvidia/llama-3.1-nemotron-nano-8b-v1:free",
108
+ "mistralai/mistral-7b-instruct-v0.1:free",
109
+ "meta-llama/llama-2-13b-chat:free"
110
+ ],
111
+ label="LLM Model",
112
+ value="nvidia/llama-3.1-nemotron-nano-8b-v1:free"
113
+ )
114
+ question_input = gr.Textbox(label="Ask a question (or leave empty for summary)", lines=2)
115
+ submit_btn = gr.Button("Process", variant="primary")
116
+
117
+ with gr.Column(scale=2):
118
+ output = gr.Textbox(label="Response", lines=15)
119
+
120
+ # Set up event handlers
121
+ submit_btn.click(
122
+ fn=process_query,
123
+ inputs=[pdf_input, question_input, model_choice],
124
+ outputs=output
125
+ )
126
+
127
+ gr.Markdown("### πŸ“ Notes")
128
+ gr.Markdown("- For large documents, only the first 3000 characters are analyzed")
129
+ gr.Markdown("- You can change the LLM model from the dropdown menu")
130
+ gr.Markdown("- Leave the question field empty to get a general summary")
131
+
132
+ # Launch the app
133
+ app.launch(debug=True, share=True)