ats-optimizer / utils /file_handlers.py
CapProj's picture
Upload 25 files
5dec17e verified
import os
from docx import Document
from pdfminer.high_level import extract_text
from pathlib import Path
from tempfile import NamedTemporaryFile
import uuid
class FileHandler:
@staticmethod
def read_file(file_path: str) -> str:
if file_path.endswith('.docx'):
return FileHandler._read_docx(file_path)
elif file_path.endswith('.pdf'):
return extract_text(file_path)
else:
with open(file_path, 'r') as f:
return f.read()
#--
@staticmethod
def save_uploaded_file(uploaded_file, directory="temp_uploads"):
"""Save Streamlit uploaded file to a temporary directory"""
try:
# Create directory if it doesn't exist
Path(directory).mkdir(exist_ok=True)
# Generate unique filename
file_ext = Path(uploaded_file.name).suffix
unique_id = uuid.uuid4().hex
temp_file = Path(directory) / f"{unique_id}{file_ext}"
# Save file
with open(temp_file, "wb") as f:
f.write(uploaded_file.getbuffer())
return str(temp_file)
except Exception as e:
print(f"Error saving file: {e}")
return None
@staticmethod
def cleanup_temp_files(directory="temp_uploads"):
"""Remove temporary files"""
try:
for file in Path(directory).glob("*"):
file.unlink()
except Exception as e:
print(f"Error cleaning files: {e}")
#--
@staticmethod
def _read_docx(file_path: str) -> str:
doc = Document(file_path)
return '\n'.join([para.text for para in doc.paragraphs])
@staticmethod
def save_resume(resume_data: dict, output_path: str):
if output_path.endswith('.docx'):
FileHandler._save_as_docx(resume_data, output_path)
else:
with open(output_path, 'w') as f:
f.write(resume_data['content'])
@staticmethod
def _save_as_docx(resume_data: dict, output_path: str):
doc = Document()
# Add formatting preservation logic here
doc.save(output_path)