Reserch / app.py
mike23415's picture
Update app.py
a3b8412 verified
raw
history blame
4.57 kB
from flask import Flask, request, send_file, jsonify
from flask_cors import CORS
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import pdfkit
import jinja2
import torch
import tempfile
import os
os.environ['TRANSFORMERS_CACHE'] = '/app/.cache'
os.environ['HF_DATASETS_CACHE'] = '/app/.cache'
os.environ['XDG_CACHE_HOME'] = '/app/.cache'
os.environ['HF_HOME'] = '/app/.cache'
app = Flask(__name__)
CORS(app)
# Initialize model and tokenizer
try:
model = AutoModelForCausalLM.from_pretrained(
"gpt2-medium",
from_tf=False,
use_safetensors=True
)
tokenizer = AutoTokenizer.from_pretrained("gpt2-medium")
generator = pipeline(
'text-generation',
model=model,
tokenizer=tokenizer,
device=0 if torch.cuda.is_available() else -1
)
except Exception as e:
print(f"Model loading failed: {str(e)}")
generator = None
IEEE_TEMPLATE = """
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>{{ title }}</title>
<style>
@page { margin: 0.75in; }
body {
font-family: 'Times New Roman', Times, serif;
font-size: 12pt;
line-height: 1.5;
}
.header {
text-align: center;
margin-bottom: 24pt;
}
.two-column {
column-count: 2;
column-gap: 0.5in;
}
h1 { font-size: 14pt; margin: 12pt 0; }
h2 { font-size: 12pt; margin: 12pt 0 6pt 0; }
.abstract { margin-bottom: 24pt; }
.keywords { font-weight: bold; margin: 12pt 0; }
.references { margin-top: 24pt; }
.reference-item { text-indent: -0.5in; padding-left: 0.5in; }
</style>
</head>
<body>
<div class="header">
<h1>{{ title }}</h1>
<div class="author-info">
{% for author in authors %}
{{ author.name }}<br>
{% if author.institution %}{{ author.institution }}<br>{% endif %}
{% if author.email %}Email: {{ author.email }}{% endif %}
{% if not loop.last %}<br>{% endif %}
{% endfor %}
</div>
</div>
<div class="abstract">
<h2>Abstract</h2>
{{ abstract }}
<div class="keywords">Keywords— {{ keywords }}</div>
</div>
<div class="two-column">
{% for section in sections %}
<h2>{{ section.title }}</h2>
{{ section.content }}
{% endfor %}
</div>
<div class="references">
<h2>References</h2>
{% for ref in references %}
<div class="reference-item">[{{ loop.index }}] {{ ref }}</div>
{% endfor %}
</div>
</body>
</html>
"""
def format_content(content):
if not generator:
return content # Fallback if model failed to load
try:
prompt = f"Format this research content to IEEE standards:\n{str(content)}"
return generator(
prompt,
max_length=1024,
num_return_sequences=1,
clean_up_tokenization_spaces=True
)[0]['generated_text']
except Exception as e:
print(f"Formatting failed: {str(e)}")
return content
@app.route('/generate', methods=['POST'])
def generate_pdf():
try:
data = request.json
if not data or 'title' not in data or 'authors' not in data:
return jsonify({"error": "Missing required fields"}), 400
# Format content using AI
formatted = format_content(data.get('content', {}))
# Generate HTML
html = jinja2.Template(IEEE_TEMPLATE).render(
title=data['title'],
authors=data['authors'],
abstract=formatted.get('abstract', ''),
keywords=', '.join(formatted.get('keywords', [])),
sections=formatted.get('sections', []),
references=formatted.get('references', [])
)
# PDF options
options = {
'page-size': 'Letter',
'margin-top': '0.75in',
'margin-right': '0.75in',
'margin-bottom': '0.75in',
'margin-left': '0.75in',
'encoding': 'UTF-8',
'quiet': ''
}
# Create PDF
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as f:
pdfkit.from_string(html, f.name, options=options)
return send_file(f.name, mimetype='application/pdf')
except Exception as e:
return jsonify({"error": str(e)}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)