|
import sys |
|
import os |
|
import json |
|
import shutil |
|
import re |
|
import gc |
|
import time |
|
from datetime import datetime |
|
from typing import List, Tuple, Dict, Union |
|
import pandas as pd |
|
import pdfplumber |
|
import gradio as gr |
|
import torch |
|
import matplotlib.pyplot as plt |
|
from fpdf import FPDF |
|
import unicodedata |
|
|
|
|
|
persistent_dir = "/data/hf_cache" |
|
model_cache_dir = os.path.join(persistent_dir, "txagent_models") |
|
tool_cache_dir = os.path.join(persistent_dir, "tool_cache") |
|
file_cache_dir = os.path.join(persistent_dir, "cache") |
|
report_dir = os.path.join(persistent_dir, "reports") |
|
|
|
for d in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]: |
|
os.makedirs(d, exist_ok=True) |
|
|
|
os.environ["HF_HOME"] = model_cache_dir |
|
os.environ["TRANSFORMERS_CACHE"] = model_cache_dir |
|
|
|
current_dir = os.path.dirname(os.path.abspath(__file__)) |
|
src_path = os.path.abspath(os.path.join(current_dir, "src")) |
|
sys.path.insert(0, src_path) |
|
|
|
from txagent.txagent import TxAgent |
|
|
|
MAX_MODEL_TOKENS = 131072 |
|
MAX_NEW_TOKENS = 4096 |
|
MAX_CHUNK_TOKENS = 8192 |
|
BATCH_SIZE = 1 |
|
PROMPT_OVERHEAD = 300 |
|
SAFE_SLEEP = 0.5 |
|
|
|
def estimate_tokens(text: str) -> int: |
|
return len(text) // 4 + 1 |
|
|
|
def clean_response(text: str) -> str: |
|
text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL) |
|
text = re.sub(r"\n{3,}", "\n\n", text) |
|
return text.strip() |
|
|
|
def remove_duplicate_paragraphs(text: str) -> str: |
|
paragraphs = text.strip().split("\n\n") |
|
seen = set() |
|
unique_paragraphs = [] |
|
for p in paragraphs: |
|
clean_p = p.strip() |
|
if clean_p and clean_p not in seen: |
|
unique_paragraphs.append(clean_p) |
|
seen.add(clean_p) |
|
return "\n\n".join(unique_paragraphs) |
|
|
|
|
|
from fastapi import FastAPI, UploadFile, File |
|
from fastapi.responses import JSONResponse |
|
import uvicorn |
|
|
|
app = FastAPI() |
|
|
|
@app.post("/analyze") |
|
async def analyze_file_api(file: UploadFile = File(...)): |
|
agent = init_agent() |
|
temp_file_path = os.path.join(file_cache_dir, file.filename) |
|
with open(temp_file_path, "wb") as f: |
|
f.write(await file.read()) |
|
messages = [] |
|
messages, pdf_path = process_report(agent, open(temp_file_path, "rb"), messages) |
|
if pdf_path: |
|
return JSONResponse(content={"summary": messages[-2]['content'], "pdf": pdf_path}) |
|
return JSONResponse(content={"error": "Processing failed."}, status_code=400) |
|
|
|
|
|
if __name__ == "__main__": |
|
agent = init_agent() |
|
ui = create_ui(agent) |
|
import threading |
|
threading.Thread(target=lambda: ui.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)).start() |
|
uvicorn.run(app, host="0.0.0.0", port=8000) |
|
|