Spaces:
Running
Running
import os | |
from docx import Document | |
from pdfminer.high_level import extract_text | |
from pathlib import Path | |
from tempfile import NamedTemporaryFile | |
import uuid | |
class FileHandler: | |
def read_file(file_path: str) -> str: | |
if file_path.endswith('.docx'): | |
return FileHandler._read_docx(file_path) | |
elif file_path.endswith('.pdf'): | |
return extract_text(file_path) | |
else: | |
with open(file_path, 'r') as f: | |
return f.read() | |
#-- | |
def save_uploaded_file(uploaded_file, directory="temp_uploads"): | |
"""Save Streamlit uploaded file to a temporary directory""" | |
try: | |
# Create directory if it doesn't exist | |
Path(directory).mkdir(exist_ok=True) | |
# Generate unique filename | |
file_ext = Path(uploaded_file.name).suffix | |
unique_id = uuid.uuid4().hex | |
temp_file = Path(directory) / f"{unique_id}{file_ext}" | |
# Save file | |
with open(temp_file, "wb") as f: | |
f.write(uploaded_file.getbuffer()) | |
return str(temp_file) | |
except Exception as e: | |
print(f"Error saving file: {e}") | |
return None | |
def cleanup_temp_files(directory="temp_uploads"): | |
"""Remove temporary files""" | |
try: | |
for file in Path(directory).glob("*"): | |
file.unlink() | |
except Exception as e: | |
print(f"Error cleaning files: {e}") | |
#-- | |
def _read_docx(file_path: str) -> str: | |
doc = Document(file_path) | |
return '\n'.join([para.text for para in doc.paragraphs]) | |
def save_resume(resume_data: dict, output_path: str): | |
if output_path.endswith('.docx'): | |
FileHandler._save_as_docx(resume_data, output_path) | |
else: | |
with open(output_path, 'w') as f: | |
f.write(resume_data['content']) | |
def _save_as_docx(resume_data: dict, output_path: str): | |
doc = Document() | |
# Add formatting preservation logic here | |
doc.save(output_path) | |