File size: 684 Bytes
5dec17e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
from dataclasses import dataclass
from typing import Dict, List

@dataclass
class Resume:
    # raw_text: str
    # formatting: Dict
    # metadata: Dict
    # scores: Dict = None
    raw_text: str
    extracted_keywords: list
    keyterms: list
    entities: list
    
    @classmethod
    def from_docx(cls, file_path: str):
        """Parse while preserving formatting"""
        text, formatting = parse_docx_with_formatting(file_path)
        return cls(text, formatting, extract_metadata(text))
    
    def to_docx(self, output_path: str):
        """Export with original formatting"""
        apply_formatting(self.raw_text, self.formatting, output_path)