|
from flask import Flask, request, send_file, jsonify |
|
from flask_cors import CORS |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
|
import pdfkit |
|
import jinja2 |
|
import torch |
|
import tempfile |
|
import os |
|
os.environ['TRANSFORMERS_CACHE'] = '/app/.cache' |
|
os.environ['HF_DATASETS_CACHE'] = '/app/.cache' |
|
os.environ['XDG_CACHE_HOME'] = '/app/.cache' |
|
|
|
app = Flask(__name__) |
|
CORS(app) |
|
|
|
|
|
try: |
|
model = AutoModelForCausalLM.from_pretrained( |
|
"gpt2-medium", |
|
from_tf=False, |
|
use_safetensors=True |
|
) |
|
tokenizer = AutoTokenizer.from_pretrained("gpt2-medium") |
|
generator = pipeline( |
|
'text-generation', |
|
model=model, |
|
tokenizer=tokenizer, |
|
device=0 if torch.cuda.is_available() else -1 |
|
) |
|
except Exception as e: |
|
print(f"Model loading failed: {str(e)}") |
|
generator = None |
|
|
|
IEEE_TEMPLATE = """ |
|
<!DOCTYPE html> |
|
<html> |
|
<head> |
|
<meta charset="UTF-8"> |
|
<title>{{ title }}</title> |
|
<style> |
|
@page { margin: 0.75in; } |
|
body { |
|
font-family: 'Times New Roman', Times, serif; |
|
font-size: 12pt; |
|
line-height: 1.5; |
|
} |
|
.header { |
|
text-align: center; |
|
margin-bottom: 24pt; |
|
} |
|
.two-column { |
|
column-count: 2; |
|
column-gap: 0.5in; |
|
} |
|
h1 { font-size: 14pt; margin: 12pt 0; } |
|
h2 { font-size: 12pt; margin: 12pt 0 6pt 0; } |
|
.abstract { margin-bottom: 24pt; } |
|
.keywords { font-weight: bold; margin: 12pt 0; } |
|
.references { margin-top: 24pt; } |
|
.reference-item { text-indent: -0.5in; padding-left: 0.5in; } |
|
</style> |
|
</head> |
|
<body> |
|
<div class="header"> |
|
<h1>{{ title }}</h1> |
|
<div class="author-info"> |
|
{% for author in authors %} |
|
{{ author.name }}<br> |
|
{% if author.institution %}{{ author.institution }}<br>{% endif %} |
|
{% if author.email %}Email: {{ author.email }}{% endif %} |
|
{% if not loop.last %}<br>{% endif %} |
|
{% endfor %} |
|
</div> |
|
</div> |
|
|
|
<div class="abstract"> |
|
<h2>Abstract</h2> |
|
{{ abstract }} |
|
<div class="keywords">Keywords— {{ keywords }}</div> |
|
</div> |
|
|
|
<div class="two-column"> |
|
{% for section in sections %} |
|
<h2>{{ section.title }}</h2> |
|
{{ section.content }} |
|
{% endfor %} |
|
</div> |
|
|
|
<div class="references"> |
|
<h2>References</h2> |
|
{% for ref in references %} |
|
<div class="reference-item">[{{ loop.index }}] {{ ref }}</div> |
|
{% endfor %} |
|
</div> |
|
</body> |
|
</html> |
|
""" |
|
|
|
def format_content(content): |
|
if not generator: |
|
return content |
|
|
|
try: |
|
prompt = f"Format this research content to IEEE standards:\n{str(content)}" |
|
return generator( |
|
prompt, |
|
max_length=1024, |
|
num_return_sequences=1, |
|
clean_up_tokenization_spaces=True |
|
)[0]['generated_text'] |
|
except Exception as e: |
|
print(f"Formatting failed: {str(e)}") |
|
return content |
|
|
|
@app.route('/generate', methods=['POST']) |
|
def generate_pdf(): |
|
try: |
|
data = request.json |
|
if not data or 'title' not in data or 'authors' not in data: |
|
return jsonify({"error": "Missing required fields"}), 400 |
|
|
|
|
|
formatted = format_content(data.get('content', {})) |
|
|
|
|
|
html = jinja2.Template(IEEE_TEMPLATE).render( |
|
title=data['title'], |
|
authors=data['authors'], |
|
abstract=formatted.get('abstract', ''), |
|
keywords=', '.join(formatted.get('keywords', [])), |
|
sections=formatted.get('sections', []), |
|
references=formatted.get('references', []) |
|
) |
|
|
|
|
|
options = { |
|
'page-size': 'Letter', |
|
'margin-top': '0.75in', |
|
'margin-right': '0.75in', |
|
'margin-bottom': '0.75in', |
|
'margin-left': '0.75in', |
|
'encoding': 'UTF-8', |
|
'quiet': '' |
|
} |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as f: |
|
pdfkit.from_string(html, f.name, options=options) |
|
return send_file(f.name, mimetype='application/pdf') |
|
|
|
except Exception as e: |
|
return jsonify({"error": str(e)}), 500 |
|
|
|
if __name__ == '__main__': |
|
app.run(host='0.0.0.0', port=5000) |