shoaibmoghal commited on
Commit
286ac47
·
verified ·
1 Parent(s): 5a004ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -90
app.py CHANGED
@@ -1,90 +1,93 @@
1
- from fastapi import FastAPI, UploadFile, File
2
- import json, re, io
3
- from llama_cpp import Llama
4
- from PyPDF2 import PdfReader
5
- from docx import Document
6
- import os
7
-
8
- # ✅ Define Model Path Inside Hugging Face Space
9
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
10
- MODEL_PATH = os.path.join(BASE_DIR, "models", "capybarahermes-2.5-mistral-7b.Q5_K_M.gguf")
11
-
12
- print(f"🔹 Loading Mistral 7B from: {MODEL_PATH} (This may take a while)")
13
-
14
- llm = Llama(model_path=MODEL_PATH, n_ctx=4096, n_gpu_layers=0) # Set n_gpu_layers=0 for CPU if no GPU
15
- print("✅ Model loaded successfully!")
16
-
17
- app = FastAPI(title="Resume Parsing API")
18
-
19
- def extract_text_from_resume(uploaded_file):
20
- file_content = uploaded_file.file.read()
21
- file_stream = io.BytesIO(file_content)
22
-
23
- if uploaded_file.filename.endswith(".pdf"):
24
- reader = PdfReader(file_stream)
25
- return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
26
- elif uploaded_file.filename.endswith(".docx"):
27
- doc = Document(file_stream)
28
- return "\n".join([para.text for para in doc.paragraphs])
29
- return None
30
-
31
- def extract_email_phone(text):
32
- email_pattern = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
33
- phone_pattern = r"\+?\d{1,3}?[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}"
34
-
35
- email_match = re.search(email_pattern, text)
36
- phone_match = re.search(phone_pattern, text)
37
-
38
- return {
39
- "email": email_match.group() if email_match else "Email not found",
40
- "phone": phone_match.group() if phone_match else "Phone not found"
41
- }
42
-
43
- def analyze_resume(text):
44
- truncated_text = text[:3500] # Keep within context limit
45
-
46
- prompt = f"""
47
- Extract these details from the resume:
48
- 1. Full Name
49
- 2. Work Experience
50
- 3. Qualifications
51
- 4. List of Skills
52
-
53
- Resume Text: {truncated_text}
54
-
55
- Format response as JSON:
56
- {{
57
- "name": "Candidate Name",
58
- "experience": [
59
- {{
60
- "company": "Company Name",
61
- "role": "Job Title",
62
- "duration": "Start Date - End Date",
63
- "responsibilities": "Brief work responsibilities"
64
- }}
65
- ],
66
- "qualifications": "Degree, Certifications",
67
- "skills": ["List of skills"]
68
- }}
69
- """
70
-
71
- response = llm(prompt, max_tokens=700)
72
- output = response["choices"][0]["text"].strip()
73
- print("🔹 Raw LLaMA Output:\n", output)
74
-
75
- try:
76
- return json.loads(output)
77
- except json.JSONDecodeError:
78
- return {"error": "Failed to parse JSON", "raw_output": output}
79
-
80
- @app.post("/parse-resume/")
81
- async def parse_resume(file: UploadFile = File(...)):
82
- text = extract_text_from_resume(file)
83
- if not text:
84
- return {"error": "Unsupported file format"}
85
-
86
- extracted_info = extract_email_phone(text)
87
- llm_data = analyze_resume(text)
88
-
89
- extracted_info.update(llm_data)
90
- return {"success": True, "data": extracted_info}
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File
2
+ import json, re, io
3
+ from llama_cpp import Llama
4
+ from PyPDF2 import PdfReader
5
+ from docx import Document
6
+ import os
7
+
8
+ # ✅ Load Mistral 7B Model from Hugging Face Model Hub
9
+ MODEL_PATH = "TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF"
10
+
11
+ print(f"🔹 Loading Mistral 7B from Hugging Face Model Hub: {MODEL_PATH} (This may take a while)")
12
+
13
+ llm = Llama.from_pretrained(MODEL_PATH, n_ctx=4096, n_gpu_layers=-1) # Use GPU if available
14
+ print("✅ Model loaded successfully!")
15
+
16
+ app = FastAPI(title="Resume Parsing API", description="Extracts key details from resumes using Mistral 7B")
17
+
18
+ # ✅ Extract Text from PDF or DOCX
19
+ def extract_text_from_resume(uploaded_file):
20
+ file_content = uploaded_file.file.read()
21
+ file_stream = io.BytesIO(file_content)
22
+
23
+ if uploaded_file.filename.endswith(".pdf"):
24
+ reader = PdfReader(file_stream)
25
+ return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
26
+ elif uploaded_file.filename.endswith(".docx"):
27
+ doc = Document(file_stream)
28
+ return "\n".join([para.text for para in doc.paragraphs])
29
+ return None
30
+
31
+ # ✅ Extract Email & Phone Number
32
+ def extract_email_phone(text):
33
+ email_pattern = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
34
+ phone_pattern = r"\+?\d{1,3}?[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}"
35
+
36
+ email_match = re.search(email_pattern, text)
37
+ phone_match = re.search(phone_pattern, text)
38
+
39
+ return {
40
+ "email": email_match.group() if email_match else "Email not found",
41
+ "phone": phone_match.group() if phone_match else "Phone not found"
42
+ }
43
+
44
+ # Analyze Resume using Mistral 7B
45
+ def analyze_resume(text):
46
+ truncated_text = text[:3500] # Keep within context limit
47
+
48
+ prompt = f"""
49
+ Extract these details from the resume:
50
+ 1. Full Name
51
+ 2. Work Experience (Company Names, Roles, Responsibilities, Duration)
52
+ 3. Qualifications (Degrees, Certifications)
53
+ 4. List of Skills
54
+
55
+ Resume Text: {truncated_text}
56
+
57
+ Format response as a **strict JSON object**:
58
+ {{
59
+ "name": "Candidate Name",
60
+ "experience": [
61
+ {{
62
+ "company": "Company Name",
63
+ "role": "Job Title",
64
+ "duration": "Start Date - End Date",
65
+ "responsibilities": "Brief work responsibilities"
66
+ }}
67
+ ],
68
+ "qualifications": "Degree, Certifications",
69
+ "skills": ["List of skills"]
70
+ }}
71
+ """
72
+
73
+ response = llm(prompt, max_tokens=700)
74
+ output = response["choices"][0]["text"].strip()
75
+ print("🔹 Raw LLaMA Output:\n", output)
76
+
77
+ try:
78
+ return json.loads(output)
79
+ except json.JSONDecodeError:
80
+ return {"error": "Failed to parse LLaMA output", "raw_output": output}
81
+
82
+ # FastAPI Route to Parse Resume
83
+ @app.post("/parse-resume/")
84
+ async def parse_resume(file: UploadFile = File(...)):
85
+ text = extract_text_from_resume(file)
86
+ if not text:
87
+ return {"error": "Unsupported file format or could not extract text"}
88
+
89
+ extracted_info = extract_email_phone(text)
90
+ llm_data = analyze_resume(text)
91
+
92
+ extracted_info.update(llm_data)
93
+ return {"success": True, "data": extracted_info}