shukdevdatta123 commited on
Commit
8267fce
·
verified ·
1 Parent(s): 83428a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -614
app.py CHANGED
@@ -3,718 +3,225 @@ import openai
3
  import fitz # PyMuPDF for PDF processing
4
  import os
5
  import tempfile
6
- import time
7
- import logging
8
- import re
9
- from typing import List, Optional, Dict, Any, Union
10
- import concurrent.futures
11
-
12
- # Set up logging
13
- logging.basicConfig(
14
- level=logging.INFO,
15
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
16
- )
17
- logger = logging.getLogger(__name__)
18
 
19
  # Variable to store API key
20
  api_key = ""
21
 
22
  # Function to update API key
23
- def set_api_key(key: str) -> str:
24
- """Set the OpenAI API key."""
25
  global api_key
26
- if not key.strip():
27
- return "Please enter a valid API key"
28
-
29
- api_key = key.strip()
30
- return "✅ API Key Set Successfully!"
31
 
32
  # Function to extract text from PDF
33
- def extract_text_from_pdf(pdf_path: str) -> str:
34
- """Extract text content from a PDF file."""
35
  try:
36
  doc = fitz.open(pdf_path)
37
- text = ""
38
- for page_num, page in enumerate(doc):
39
- text += f"\n--- Page {page_num + 1} ---\n"
40
- text += page.get_text("text")
41
  return text
42
  except Exception as e:
43
- logger.error(f"Error extracting text from PDF: {str(e)}")
44
  return f"Error extracting text from PDF: {str(e)}"
45
 
46
- # Function to truncate text to fit token limits
47
- def truncate_text_for_tokens(text: str, max_tokens: int = 8000) -> str:
48
- """Truncate text to approximately fit within token limits."""
49
- # Rough approximation: 1 token ≈ 4 characters in English
50
- char_limit = max_tokens * 4
51
- if len(text) > char_limit:
52
- return text[:char_limit] + "\n[Content truncated due to length...]"
53
- return text
54
-
55
- # Function to extract title from PDF content
56
- def extract_title(pdf_text: str) -> str:
57
- """Attempt to extract a title from PDF text."""
58
- # Look for title in first few lines
59
- first_lines = pdf_text.split('\n')[:10]
60
- for line in first_lines:
61
- line = line.strip()
62
- # Title candidates: all caps, longer than 5 chars, shorter than 200
63
- if len(line) > 5 and len(line) < 200 and not line.startswith('---'):
64
- return line
65
-
66
- return "Untitled Document"
67
-
68
- # Model selection options
69
- MODEL_OPTIONS = {
70
- "gpt-4.1": "GPT-4 (Most powerful, slower)",
71
- "gpt-3.5-turbo": "GPT-3.5 Turbo (Faster, less powerful)"
72
- }
73
-
74
- # Function to get available OpenAI models
75
- def get_available_models() -> List[str]:
76
- """Get list of available OpenAI models."""
77
  if not api_key:
78
- return list(MODEL_OPTIONS.keys())
79
 
80
- try:
81
- openai.api_key = api_key
82
- response = openai.Model.list()
83
- models = [model.id for model in response['data'] if 'gpt' in model.id.lower()]
84
- # Add to our options if found
85
- for model in models:
86
- if model not in MODEL_OPTIONS and ('gpt-4.1' in model or 'gpt-3.5-turbo' in model):
87
- MODEL_OPTIONS[model] = model
88
- return list(MODEL_OPTIONS.keys())
89
- except Exception as e:
90
- logger.error(f"Error fetching models: {str(e)}")
91
- return list(MODEL_OPTIONS.keys())
92
-
93
- # Function for parallel PDF processing
94
- def process_pdf_in_parallel(pdf_files: List[str]) -> List[tuple]:
95
- """Process multiple PDFs in parallel to extract text."""
96
- results = []
97
 
98
- with concurrent.futures.ThreadPoolExecutor() as executor:
99
- future_to_pdf = {executor.submit(extract_text_from_pdf, pdf_path): pdf_path for pdf_path in pdf_files}
100
- for future in concurrent.futures.as_completed(future_to_pdf):
101
- pdf_path = future_to_pdf[future]
102
- pdf_name = os.path.basename(pdf_path)
103
- try:
104
- pdf_text = future.result()
105
- # Truncate if needed
106
- pdf_text = truncate_text_for_tokens(pdf_text)
107
- results.append((pdf_name, pdf_text))
108
- except Exception as e:
109
- logger.error(f"Error processing {pdf_name}: {str(e)}")
110
- results.append((pdf_name, f"Error processing file: {str(e)}"))
111
 
112
- return results
113
-
114
- # Function to create system prompt
115
- def create_system_prompt(review_type: str = "systematic") -> str:
116
- """Create system prompt based on review type."""
117
- if review_type == "systematic":
118
- return """
119
- You are an expert academic researcher tasked with creating comprehensive systematic reviews. Follow these steps:
120
 
 
 
121
  Step 1: Identify a Research Field
122
- Identify the specific area of study represented in the provided papers.
123
 
124
  Step 2: Generate a Research Question
125
- Create a specific, measurable, achievable, relevant, and time-bound (SMART) research question that unifies the papers.
126
 
127
  Step 3: Create a Protocol
128
- Outline a detailed methodology for your review, including analysis methods appropriate for the papers.
129
 
130
  Step 4: Evaluate Relevant Literature
131
- Critically evaluate the quality, methodology, and findings of the provided papers, identifying gaps or limitations.
132
 
133
  Step 5: Investigate Sources for Answers
134
- Examine how the papers contribute to answering the research question.
135
 
136
  Step 6: Collect Data as per Protocol
137
- Implement rigorous data collection methods, extracting key findings and statistics.
138
 
139
  Step 7: Data Extraction
140
- Organize the extracted data in a structured format, including tables where appropriate.
141
 
142
  Step 8: Critical Analysis of Results
143
- Interpret patterns, trends, and conclusions from the data, comparing findings across papers.
144
 
145
  Step 9: Interpreting Derivations
146
- Contextualize the findings in relation to the research question and broader field.
147
 
148
  Step 10: Concluding Statements
149
- Summarize findings, draw conclusions, and provide recommendations for future research.
150
-
151
- Step 11: References
152
- Include proper citations for all papers reviewed and any additional references.
153
-
154
- Your review should be:
155
- - Comprehensive yet concise
156
- - Well-structured with clear headings and subheadings
157
- - Using academic language appropriate for a scholarly audience
158
- - Including data visualizations or tables where helpful
159
- - Balanced and objective in evaluating the evidence
160
- """
161
- elif review_type == "literature":
162
- return """
163
- You are an expert academic researcher tasked with creating a thorough literature review. Your review should:
164
-
165
- 1. Provide an overview of the current state of knowledge in the specific field
166
- 2. Identify common themes, methodologies, and findings across the papers
167
- 3. Highlight contradictions or inconsistencies in the literature
168
- 4. Evaluate the strength of evidence for key claims
169
- 5. Identify research gaps and future directions
170
- 6. Organize findings in a logical, thematic structure
171
- 7. Include visual elements (tables, concept maps) to synthesize information
172
- 8. Maintain academic rigor and proper attribution
173
 
174
- Your review should be scholarly in tone, well-organized, and provide a balanced assessment of the literature.
 
175
  """
176
- else: # meta-analysis
177
- return """
178
- You are an expert researcher conducting a meta-analysis of the provided papers. Your analysis should:
179
-
180
- 1. Identify a precise research question that can be answered quantitatively
181
- 2. Extract comparable quantitative data, effect sizes, or statistics from the papers
182
- 3. Assess the methodological quality and risk of bias in each study
183
- 4. Synthesize findings using appropriate statistical methods
184
- 5. Present results using forest plots, funnel plots, or other visualizations
185
- 6. Discuss heterogeneity and its potential sources
186
- 7. Evaluate publication bias and its impact on the findings
187
- 8. Draw conclusions based on the pooled data
188
- 9. Discuss implications for practice and future research
189
-
190
- Your meta-analysis should follow PRISMA guidelines where applicable, maintain statistical rigor, and provide clear visual representations of the quantitative synthesis.
191
- """
192
-
193
- # Function to interact with OpenAI API for systematic review
194
- def generate_systematic_review(
195
- pdf_files: List[str],
196
- review_question: str,
197
- model: str = "gpt-4.1",
198
- review_type: str = "systematic",
199
- include_tables: bool = True,
200
- temperature: float = 0.7,
201
- max_tokens: int = 4000
202
- ) -> str:
203
- """Generate a systematic review of the provided PDF files."""
204
- if not api_key:
205
- return "Please enter your OpenAI API key first."
206
-
207
- if not pdf_files:
208
- return "Please upload at least one PDF file."
209
-
210
- if not review_question:
211
- return "Please enter a review question."
212
-
213
- try:
214
- # Start timer
215
- start_time = time.time()
216
-
217
- openai.api_key = api_key
218
 
219
- # Create the system message with review guidelines
220
- system_prompt = create_system_prompt(review_type)
 
221
 
222
- # Process PDFs in parallel
223
- logger.info(f"Processing {len(pdf_files)} PDFs...")
224
- pdf_results = process_pdf_in_parallel(pdf_files)
225
-
226
- # Extract titles for reference
227
- titles = [extract_title(pdf_text) for _, pdf_text in pdf_results]
228
- pdf_names = [name for name, _ in pdf_results]
 
 
 
 
229
 
230
  # Prepare the user prompt with the review question and instructions
231
  table_instruction = ""
232
  if include_tables:
233
- table_instruction = " Please include important tables, charts or figures in your review to help summarize the findings."
234
 
235
- user_prompt = f"""
236
- Please generate a {review_type} review of the following {len(pdf_files)} papers:
237
- {', '.join([f"{i+1}. {pdf_names[i]} (Title: {titles[i]})" for i in range(len(pdf_names))])}
238
-
239
- Review Question: {review_question}
240
-
241
- {table_instruction}
242
-
243
- Format your response with clear headings, subheadings, and properly formatted tables using markdown syntax.
244
- """
245
-
246
- # Combine PDF texts, with truncation if needed
247
- combined_pdf_text = ""
248
- total_chars = 0
249
- max_chars = 20000 # Rough approximation to fit within token limits
250
-
251
- for i, (pdf_name, pdf_text) in enumerate(pdf_results):
252
- header = f"\n\n--- PAPER {i+1}: {pdf_name} ---\n\n"
253
- if total_chars + len(header) + len(pdf_text) > max_chars:
254
- # Truncate this paper's text
255
- remaining = max_chars - total_chars - len(header)
256
- if remaining > 500: # Only add if we can include meaningful content
257
- truncated_text = pdf_text[:remaining] + "\n[... Content truncated due to length limitations ...]"
258
- combined_pdf_text += header + truncated_text
259
- total_chars += len(header) + len(truncated_text)
260
- break
261
- else:
262
- combined_pdf_text += header + pdf_text
263
- total_chars += len(header) + len(pdf_text)
264
 
265
  # Create the messages for the API call
266
  messages = [
267
  {"role": "system", "content": system_prompt},
268
- {"role": "user", "content": user_prompt + combined_pdf_text}
269
  ]
270
 
271
- logger.info(f"Sending request to OpenAI API (model: {model})...")
272
-
273
- # Call the API
274
  response = openai.ChatCompletion.create(
275
- model=model,
276
  messages=messages,
277
- temperature=temperature,
278
- max_tokens=max_tokens
 
279
  )
280
 
281
- result = response["choices"][0]["message"]["content"]
282
-
283
- # Convert markdown to HTML for tables
284
- result_html = markdown.markdown(result, extensions=['tables'])
285
-
286
- # Calculate time taken
287
- time_taken = time.time() - start_time
288
- logger.info(f"Review generated in {time_taken:.2f} seconds")
289
-
290
- return result
291
 
292
  except Exception as e:
293
- logger.error(f"Error generating review: {str(e)}")
294
  return f"Error generating systematic review: {str(e)}"
295
 
296
  # Function to save uploaded files
297
- def save_uploaded_files(files) -> List[str]:
298
- """Save uploaded files to temporary directory and return their paths."""
299
  if not files:
300
  return []
301
 
302
  saved_paths = []
303
  for file in files:
304
  if file is not None:
305
- # Extract file extension
306
- file_extension = os.path.splitext(file.name)[1].lower()
307
-
308
- # Only process PDF files
309
- if file_extension != '.pdf':
310
- continue
311
-
312
  # Create a temporary file
313
- with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp_file:
314
- # If file is a file object, write its content
315
- if hasattr(file, 'read'):
316
- tmp_file.write(file.read())
317
- # If file is already a path
318
- else:
319
- with open(file, 'rb') as f:
320
- tmp_file.write(f.read())
321
-
322
  saved_paths.append(tmp_file.name)
323
 
324
  return saved_paths
325
 
326
- # Custom HTML and CSS for better UI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  css = """
328
  <style>
329
- /* Base styling */
330
- body {
331
- font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
332
- }
333
-
334
- .container {
335
- max-width: 1200px !important;
336
- margin: 0 auto;
337
- }
338
-
339
- /* Header styling */
340
- .header {
341
- background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%);
342
- color: white;
343
- padding: 20px;
344
- border-radius: 10px;
345
- margin-bottom: 20px;
346
- box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
347
- }
348
-
349
- /* Button styling */
350
  #generate_button {
351
  background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
352
  color: white;
353
  font-weight: bold;
354
- padding: 10px 20px;
355
- border-radius: 8px;
356
- border: none;
357
- cursor: pointer;
358
- box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
359
- transition: all 0.3s ease;
360
  }
361
-
362
  #generate_button:hover {
363
  background: linear-gradient(135deg, #5b10f1 0%, #9f3ef3 100%); /* Slightly lighter */
364
- transform: translateY(-2px);
365
- box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
366
  }
367
-
368
  #api_key_button {
369
  background: linear-gradient(135deg, #68d391 0%, #48bb78 100%); /* Green gradient */
370
  color: white;
371
  font-weight: bold;
372
  margin-top: 27px;
373
- padding: 10px 20px;
374
- border-radius: 8px;
375
- border: none;
376
- cursor: pointer;
377
- box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
378
- transition: all 0.3s ease;
379
  }
380
-
381
  #api_key_button:hover {
382
  background: linear-gradient(135deg, #38a169 0%, #68d391 100%); /* Slightly darker green */
383
- transform: translateY(-2px);
384
- box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
385
- }
386
-
387
- /* Card styling */
388
- .card {
389
- background-color: white;
390
- border-radius: 10px;
391
- padding: 20px;
392
- margin-bottom: 20px;
393
- box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
394
- }
395
-
396
- /* Form styling */
397
- .form-group {
398
- margin-bottom: 15px;
399
  }
400
-
401
- /* Tabs styling */
402
- .tab-content {
403
- padding: 20px;
404
- background-color: white;
405
- border-radius: 0 0 10px 10px;
406
- }
407
-
408
- /* Table styling in output */
409
- .output-container table {
410
- border-collapse: collapse;
411
- width: 100%;
412
- margin: 20px 0;
413
- }
414
-
415
- .output-container th, .output-container td {
416
- border: 1px solid #ddd;
417
- padding: 8px;
418
- text-align: left;
419
- }
420
-
421
- .output-container th {
422
- background-color: #f2f2f2;
423
- font-weight: bold;
424
- }
425
-
426
- .output-container tr:nth-child(even) {
427
- background-color: #f9f9f9;
428
- }
429
-
430
- /* Spinner styling */
431
- .loading-spinner {
432
- display: inline-block;
433
- width: 20px;
434
- height: 20px;
435
- border: 3px solid rgba(0, 0, 0, 0.1);
436
- border-radius: 50%;
437
- border-top-color: #4a00e0;
438
- animation: spin 1s ease-in-out infinite;
439
- }
440
-
441
- @keyframes spin {
442
- to {
443
- transform: rotate(360deg);
444
- }
445
- }
446
-
447
- /* Customizations for Gradio */
448
  .gradio-container {
449
  max-width: 1200px !important;
450
  }
451
-
452
- .gr-form, .gr-box {
453
- border-radius: 10px !important;
454
- }
455
-
456
- .gr-input, .gr-textarea {
457
- border-radius: 6px !important;
458
- }
459
-
460
- /* Responsive adjustments */
461
- @media (max-width: 768px) {
462
- .header {
463
- padding: 15px;
464
- }
465
-
466
- #generate_button, #api_key_button {
467
- padding: 8px 16px;
468
- }
469
- }
470
  </style>
471
  """
472
 
473
- # Add custom HTML header
474
- header_html = """
475
- <div class="header">
476
- <h1>Systematic Review Generator for Research Papers</h1>
477
- <p>Upload multiple PDF papers to generate comprehensive reviews, literature analyses, and meta-analyses</p>
478
- </div>
479
- """
480
-
481
- # Custom progress component
482
- def progress_component(text, progress):
483
- return f"""
484
- <div style="margin: 10px 0; width: 100%;">
485
- <div style="display: flex; align-items: center; margin-bottom: 5px;">
486
- <div>{text}</div>
487
- <div style="margin-left: auto;">{progress}%</div>
488
- </div>
489
- <div style="background-color: #e0e0e0; height: 8px; border-radius: 4px; width: 100%;">
490
- <div style="background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); height: 100%; width: {progress}%; border-radius: 4px;"></div>
491
- </div>
492
- </div>
493
- """
494
-
495
- # Function to create a review
496
- def create_review(files, question, model, review_type, include_tables, temperature, max_tokens, progress=gr.Progress()):
497
- try:
498
- if not files:
499
- return "Please upload at least one PDF file."
500
-
501
- progress(0.1, desc="Saving uploaded files...")
502
- saved_paths = save_uploaded_files(files)
503
-
504
- if not saved_paths:
505
- return "No valid PDF files were uploaded. Please upload PDF files only."
506
-
507
- progress(0.3, desc="Processing PDFs...")
508
- review = generate_systematic_review(
509
- saved_paths,
510
- question,
511
- model=model,
512
- review_type=review_type,
513
- include_tables=include_tables,
514
- temperature=temperature,
515
- max_tokens=max_tokens
516
- )
517
-
518
- progress(0.9, desc="Finalizing review...")
519
-
520
- # Clean up temporary files
521
- for path in saved_paths:
522
- try:
523
- os.remove(path)
524
- except Exception as e:
525
- logger.error(f"Error removing temporary file {path}: {str(e)}")
526
-
527
- progress(1.0, desc="Complete!")
528
- return review
529
-
530
- except Exception as e:
531
- logger.error(f"Error in create_review: {str(e)}")
532
- return f"An error occurred: {str(e)}"
533
-
534
- # Gradio UI Layout
535
- def create_ui():
536
- with gr.Blocks(css=css) as demo:
537
- gr.HTML(header_html)
538
-
539
- with gr.Tabs() as tabs:
540
- with gr.TabItem("Generate Review"):
541
- with gr.Row():
542
- with gr.Column(scale=1):
543
- with gr.Box():
544
- gr.Markdown("### 1. Setup API Key")
545
- api_key_input = gr.Textbox(
546
- label="Enter OpenAI API Key",
547
- type="password",
548
- placeholder="sk-..."
549
- )
550
- api_key_button = gr.Button("Set API Key", elem_id="api_key_button")
551
- api_key_output = gr.Textbox(
552
- label="API Key Status",
553
- interactive=False,
554
- value="Not set"
555
- )
556
-
557
- with gr.Box():
558
- gr.Markdown("### 2. Upload Papers")
559
- pdf_files = gr.File(
560
- label="Upload PDF Research Papers (PDF files only)",
561
- file_count="multiple",
562
- type="binary",
563
- file_types=[".pdf"]
564
- )
565
-
566
- with gr.Column(scale=1):
567
- with gr.Box():
568
- gr.Markdown("### 3. Review Configuration")
569
- review_question = gr.Textbox(
570
- label="Review Question or Topic",
571
- placeholder="What are the current advances in GAN applications for speech processing?",
572
- lines=2
573
- )
574
-
575
- review_type = gr.Radio(
576
- label="Review Type",
577
- choices=["systematic", "literature", "meta-analysis"],
578
- value="systematic"
579
- )
580
-
581
- model = gr.Dropdown(
582
- label="Model",
583
- choices=list(MODEL_OPTIONS.keys()),
584
- value="gpt-4.1"
585
- )
586
-
587
- with gr.Row():
588
- include_tables = gr.Checkbox(
589
- label="Include Tables and Figures",
590
- value=True
591
- )
592
-
593
- with gr.Column():
594
- temperature = gr.Slider(
595
- label="Temperature (Creativity)",
596
- minimum=0.0,
597
- maximum=1.0,
598
- value=0.7,
599
- step=0.1
600
- )
601
-
602
- max_tokens = gr.Slider(
603
- label="Maximum Output Length",
604
- minimum=1000,
605
- maximum=8000,
606
- value=4000,
607
- step=500
608
- )
609
-
610
- generate_button = gr.Button(
611
- "Generate Review",
612
- elem_id="generate_button",
613
- variant="primary"
614
- )
615
-
616
- # Output
617
- with gr.Box():
618
- gr.Markdown("### Review Output")
619
- review_output = gr.Markdown(
620
- label="Generated Review",
621
- value="Review will appear here after generation..."
622
- )
623
-
624
- with gr.Row():
625
- copy_button = gr.Button("📋 Copy to Clipboard")
626
- export_button = gr.Button("📥 Export as Markdown")
627
-
628
- with gr.TabItem("How to Use"):
629
- gr.Markdown("""
630
- ### Getting Started with the Systematic Review Generator
631
-
632
- #### 1. Setting Up
633
- - Enter your OpenAI API key in the field provided and click "Set API Key"
634
- - You'll need an API key with access to GPT-4 or GPT-3.5 for best results
635
- - Your API key is never stored and is only used for this session
636
-
637
- #### 2. Uploading Papers
638
- - Upload 2 or more PDF research papers (the more related they are, the better)
639
- - Only PDF files are supported
640
- - Papers should ideally be related to the same research field
641
-
642
- #### 3. Configuring Your Review
643
- - Enter a specific review question or topic
644
- - Choose the review type:
645
- - **Systematic Review**: Follows a rigorous methodology to answer a specific research question
646
- - **Literature Review**: Provides an overview of existing research on a topic
647
- - **Meta-Analysis**: Combines and analyzes quantitative data from multiple studies
648
- - Select the AI model (GPT-4 recommended for complex papers)
649
- - Adjust temperature (higher = more creative, lower = more focused)
650
- - Set maximum output length (longer reviews will be more comprehensive)
651
-
652
- #### 4. Generating Your Review
653
- - Click "Generate Review" to start the process
654
- - Processing time depends on the number and size of papers, and the selected model
655
- - You can copy or export the final review when complete
656
-
657
- #### Tips for Best Results
658
- - Use papers from the same field or on related topics
659
- - Be specific in your review question
660
- - For technical papers, choose GPT-4 for better comprehension
661
- - The system works best with 2-5 related papers
662
- - Consider using a lower temperature (0.3-0.5) for more factual reviews
663
- """)
664
-
665
- with gr.TabItem("About"):
666
- gr.Markdown("""
667
- ### About the Systematic Review Generator
668
-
669
- This application helps researchers, students, and academics generate comprehensive reviews of scientific papers. It leverages advanced AI to analyze PDF research papers and synthesize findings into structured, coherent reviews.
670
-
671
- #### Features
672
- - Support for multiple review types: systematic reviews, literature reviews, and meta-analyses
673
- - Automatic extraction of text from PDF files
674
- - Parallel processing of multiple papers
675
- - Integration with OpenAI's GPT models
676
- - Customizable output parameters
677
- - Table and figure generation capabilities
678
-
679
- #### How It Works
680
- 1. The system extracts text from your uploaded PDFs
681
- 2. It identifies the main topics, methodologies, and findings
682
- 3. Based on your review question, it synthesizes information across papers
683
- 4. It structures the information following academic review standards
684
- 5. It provides a comprehensive review with proper sections and references
685
-
686
- #### Limitations
687
- - The quality of the review depends on the clarity of the PDFs and their text extraction
688
- - Complex scientific notation, tables, or images in PDFs may not be perfectly interpreted
689
- - The system provides a starting point, not a final paper - always review and verify the output
690
- - Token limits may prevent full analysis of very long or numerous papers
691
-
692
- #### Privacy & Security
693
- - Your API key is never stored and is only used for the current session
694
- - Uploaded PDFs are processed temporarily and deleted after review generation
695
- - No data is retained after you close the application
696
- """)
697
-
698
- # Button actions
699
- api_key_button.click(set_api_key, inputs=[api_key_input], outputs=[api_key_output])
700
-
701
- generate_button.click(
702
- create_review,
703
- inputs=[pdf_files, review_question, model, review_type, include_tables, temperature, max_tokens],
704
- outputs=[review_output]
705
- )
706
-
707
- # Function to refresh model list
708
- def refresh_models():
709
- return gr.Dropdown.update(choices=get_available_models())
710
-
711
- api_key_button.click(refresh_models, outputs=[model])
712
-
713
- # Copy function is handled client-side via JavaScript
714
-
715
- return demo
716
-
717
  # Launch the app
718
  if __name__ == "__main__":
719
- demo = create_ui()
720
  demo.launch(share=True)
 
3
  import fitz # PyMuPDF for PDF processing
4
  import os
5
  import tempfile
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  # Variable to store API key
8
  api_key = ""
9
 
10
  # Function to update API key
11
+ def set_api_key(key):
 
12
  global api_key
13
+ api_key = key
14
+ return "API Key Set Successfully!"
 
 
 
15
 
16
  # Function to extract text from PDF
17
+ def extract_text_from_pdf(pdf_path):
 
18
  try:
19
  doc = fitz.open(pdf_path)
20
+ text = "\n".join([page.get_text("text") for page in doc])
 
 
 
21
  return text
22
  except Exception as e:
 
23
  return f"Error extracting text from PDF: {str(e)}"
24
 
25
+ # Function to interact with OpenAI API for systematic review
26
+ def generate_systematic_review(pdf_files, review_question, include_tables=True):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  if not api_key:
28
+ return "Please enter your OpenAI API key first."
29
 
30
+ if not pdf_files:
31
+ return "Please upload at least one PDF file."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ if not review_question:
34
+ return "Please enter a review question."
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ try:
37
+ openai.api_key = api_key
 
 
 
 
 
 
38
 
39
+ # Create the system message with systematic review guidelines
40
+ system_prompt = """
41
  Step 1: Identify a Research Field
42
+ The first step in writing a systematic review paper is to identify a research field. This involves selecting a specific area of study that you are interested in and want to explore further.
43
 
44
  Step 2: Generate a Research Question
45
+ Once you have identified your research field, the next step is to generate a research question. This question should be specific, measurable, achievable, relevant, and time-bound (SMART).
46
 
47
  Step 3: Create a Protocol
48
+ After generating your research question, the next step is to create a protocol. A detailed plan of how you will conduct your research, including the methods you will use, the data you will collect, and the analysis you will perform.
49
 
50
  Step 4: Evaluate Relevant Literature
51
+ The fourth step is to evaluate relevant literature. This involves searching for and reviewing existing studies related to your research question. You should critically evaluate the quality of these studies and identify any gaps or limitations in the current literature.
52
 
53
  Step 5: Investigate Sources for Answers
54
+ The fifth step is to investigate sources for answers. This involves searching for and accessing relevant data and information that will help you answer your research question.
55
 
56
  Step 6: Collect Data as per Protocol
57
+ The sixth step is to collect data as per protocol. This involves implementing the methods outlined in your protocol and collecting the data specified. You should ensure that your data collection methods are rigorous and reliable.
58
 
59
  Step 7: Data Extraction
60
+ The seventh step is to extract the data. This involves organizing and analyzing the data you have collected, and extracting the relevant information that will help you answer your research question.
61
 
62
  Step 8: Critical Analysis of Results
63
+ The eighth step is to conduct a critical analysis of your results. This involves interpreting your findings, identifying patterns and trends, and drawing conclusions based on your data.
64
 
65
  Step 9: Interpreting Derivations
66
+ The ninth step is to interpret the derivations. This involves taking the conclusions you have drawn from your data and interpreting them in the context of your research question.
67
 
68
  Step 10: Concluding Statements
69
+ The final step is to make concluding statements. This involves summarizing your findings and drawing conclusions based on your research. You should also provide recommendations for future research and implications for practice.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
+ Step-11:
72
+ Please include references in the form of citation and also link to the reference papers.
73
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
+ # Extract text from each PDF
76
+ pdf_texts = []
77
+ pdf_names = []
78
 
79
+ for pdf_file in pdf_files:
80
+ if isinstance(pdf_file, str): # If it's already a path
81
+ pdf_path = pdf_file
82
+ else: # If it's a file object
83
+ pdf_path = pdf_file.name
84
+
85
+ pdf_name = os.path.basename(pdf_path)
86
+ pdf_text = extract_text_from_pdf(pdf_path)
87
+
88
+ pdf_texts.append(pdf_text)
89
+ pdf_names.append(pdf_name)
90
 
91
  # Prepare the user prompt with the review question and instructions
92
  table_instruction = ""
93
  if include_tables:
94
+ table_instruction = " Please include important new generated tables in your review."
95
 
96
+ user_prompt = f"Please generate a systematic review of the following {len(pdf_files)} papers: {', '.join(pdf_names)}.{table_instruction}\n\nReview Question: {review_question}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  # Create the messages for the API call
99
  messages = [
100
  {"role": "system", "content": system_prompt},
101
+ {"role": "user", "content": user_prompt + "\n\n" + "\n\n".join([f"Paper {i+1} - {pdf_names[i]}:\n{pdf_texts[i]}" for i in range(len(pdf_texts))])}
102
  ]
103
 
104
+ # Call the API with temperature=1 and top_p=1 as specified
 
 
105
  response = openai.ChatCompletion.create(
106
+ model="gpt-4.1",
107
  messages=messages,
108
+ temperature=1,
109
+ top_p=1,
110
+ max_tokens=2048
111
  )
112
 
113
+ return response["choices"][0]["message"]["content"]
 
 
 
 
 
 
 
 
 
114
 
115
  except Exception as e:
 
116
  return f"Error generating systematic review: {str(e)}"
117
 
118
  # Function to save uploaded files
119
+ def save_uploaded_files(files):
 
120
  if not files:
121
  return []
122
 
123
  saved_paths = []
124
  for file in files:
125
  if file is not None:
 
 
 
 
 
 
 
126
  # Create a temporary file
127
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
128
+ tmp_file.write(file)
 
 
 
 
 
 
 
129
  saved_paths.append(tmp_file.name)
130
 
131
  return saved_paths
132
 
133
+ # Gradio UI Layout
134
+ with gr.Blocks() as demo:
135
+ gr.Markdown("# Systematic Review Generator for Research Papers")
136
+
137
+ with gr.Accordion("How to Use This App", open=True):
138
+ gr.Markdown("""
139
+ ### Getting Started:
140
+ 1. Enter your OpenAI API key in the field below and click "Set API Key"
141
+ 2. Upload multiple PDF research papers (2 or more recommended)
142
+ 3. Enter your review question or topic
143
+ 4. Check the "Include Tables" option if you want the review to include comparison tables
144
+ 5. Click "Generate Systematic Review" to start the process
145
+
146
+ ### Tips:
147
+ - For best results, upload papers that are related to the same research topic or field
148
+ - Be specific in your review question to get more focused results
149
+ - The generated review will follow a systematic structure including research field identification, data extraction, analysis, and conclusions
150
+ - The more papers you upload, the more comprehensive the review will be
151
+ """)
152
+
153
+ # API Key Input
154
+ with gr.Row():
155
+ api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password")
156
+ api_key_button = gr.Button("Set API Key", elem_id="api_key_button")
157
+ api_key_output = gr.Textbox(label="API Key Status", interactive=False)
158
+
159
+ # PDF Upload and Review Settings
160
+ with gr.Row():
161
+ with gr.Column():
162
+ pdf_files = gr.File(label="Upload PDF Research Papers", file_count="multiple", type="binary")
163
+ review_question = gr.Textbox(label="Review Question or Topic", placeholder="What are the current advances in GAN applications for speech processing?")
164
+ include_tables = gr.Checkbox(label="Include Comparison Tables", value=True)
165
+ generate_button = gr.Button("Generate Systematic Review", elem_id="generate_button")
166
+
167
+ # Output
168
+ review_output = gr.Textbox(label="Systematic Review", interactive=False, lines=20)
169
+
170
+ # Button actions
171
+ api_key_button.click(set_api_key, inputs=[api_key_input], outputs=[api_key_output])
172
+
173
+ # Generate systematic review
174
+ def process_files_and_generate_review(files, question, include_tables):
175
+ if not files:
176
+ return "Please upload at least one PDF file."
177
+
178
+ # Save uploaded files
179
+ saved_paths = save_uploaded_files(files)
180
+
181
+ # Generate review
182
+ review = generate_systematic_review(saved_paths, question, include_tables)
183
+
184
+ # Clean up temporary files
185
+ for path in saved_paths:
186
+ try:
187
+ os.remove(path)
188
+ except:
189
+ pass
190
+
191
+ return review
192
+
193
+ generate_button.click(
194
+ process_files_and_generate_review,
195
+ inputs=[pdf_files, review_question, include_tables],
196
+ outputs=[review_output]
197
+ )
198
+
199
+ # Add CSS styling
200
  css = """
201
  <style>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  #generate_button {
203
  background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
204
  color: white;
205
  font-weight: bold;
 
 
 
 
 
 
206
  }
 
207
  #generate_button:hover {
208
  background: linear-gradient(135deg, #5b10f1 0%, #9f3ef3 100%); /* Slightly lighter */
 
 
209
  }
 
210
  #api_key_button {
211
  background: linear-gradient(135deg, #68d391 0%, #48bb78 100%); /* Green gradient */
212
  color: white;
213
  font-weight: bold;
214
  margin-top: 27px;
 
 
 
 
 
 
215
  }
 
216
  #api_key_button:hover {
217
  background: linear-gradient(135deg, #38a169 0%, #68d391 100%); /* Slightly darker green */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  .gradio-container {
220
  max-width: 1200px !important;
221
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  </style>
223
  """
224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  # Launch the app
226
  if __name__ == "__main__":
 
227
  demo.launch(share=True)