Spaces:

shoaibmoghal
/

resume-parser-fastapi

Sleeping

App Files Files Community

shoaibmoghal commited on Feb 14

Commit

286ac47

verified ·

1 Parent(s): 5a004ef

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -90

app.py CHANGED Viewed

@@ -1,90 +1,93 @@
-from fastapi import FastAPI, UploadFile, File
-import json, re, io
-from llama_cpp import Llama
-from PyPDF2 import PdfReader
-from docx import Document
-import os
-# ✅ Define Model Path Inside Hugging Face Space
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-MODEL_PATH = os.path.join(BASE_DIR, "models", "capybarahermes-2.5-mistral-7b.Q5_K_M.gguf")
-print(f"🔹 Loading Mistral 7B from: {MODEL_PATH} (This may take a while)")
-llm = Llama(model_path=MODEL_PATH, n_ctx=4096, n_gpu_layers=0)  # Set n_gpu_layers=0 for CPU if no GPU
-print("✅ Model loaded successfully!")
-app = FastAPI(title="Resume Parsing API")
-def extract_text_from_resume(uploaded_file):
-    file_content = uploaded_file.file.read()
-    file_stream = io.BytesIO(file_content)
-    if uploaded_file.filename.endswith(".pdf"):
-        reader = PdfReader(file_stream)
-        return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
-    elif uploaded_file.filename.endswith(".docx"):
-        doc = Document(file_stream)
-        return "\n".join([para.text for para in doc.paragraphs])
-    return None
-def extract_email_phone(text):
-    email_pattern = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
-    phone_pattern = r"\+?\d{1,3}?[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}"
-    email_match = re.search(email_pattern, text)
-    phone_match = re.search(phone_pattern, text)
-    return {
-        "email": email_match.group() if email_match else "Email not found",
-        "phone": phone_match.group() if phone_match else "Phone not found"
-    }
-def analyze_resume(text):
-    truncated_text = text[:3500]  # Keep within context limit
-    prompt = f"""
-    Extract these details from the resume:
-    1. Full Name
-    2. Work Experience
-    3. Qualifications
-    4. List of Skills
-    Resume Text: {truncated_text}
-    Format response as JSON:
-    {{
-        "name": "Candidate Name",
-        "experience": [
-            {{
-                "company": "Company Name",
-                "role": "Job Title",
-                "duration": "Start Date - End Date",
-                "responsibilities": "Brief work responsibilities"
-            }}
-        ],
-        "qualifications": "Degree, Certifications",
-        "skills": ["List of skills"]
-    }}
-    """
-    response = llm(prompt, max_tokens=700)
-    output = response["choices"][0]["text"].strip()
-    print("🔹 Raw LLaMA Output:\n", output)
-    try:
-        return json.loads(output)
-    except json.JSONDecodeError:
-        return {"error": "Failed to parse JSON", "raw_output": output}
-@app.post("/parse-resume/")
-async def parse_resume(file: UploadFile = File(...)):
-    text = extract_text_from_resume(file)
-    if not text:
-        return {"error": "Unsupported file format"}
-    extracted_info = extract_email_phone(text)
-    llm_data = analyze_resume(text)
-    extracted_info.update(llm_data)
-    return {"success": True, "data": extracted_info}

+from fastapi import FastAPI, UploadFile, File
+import json, re, io
+from llama_cpp import Llama
+from PyPDF2 import PdfReader
+from docx import Document
+import os
+# ✅ Load Mistral 7B Model from Hugging Face Model Hub
+MODEL_PATH = "TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF"
+print(f"🔹 Loading Mistral 7B from Hugging Face Model Hub: {MODEL_PATH} (This may take a while)")
+llm = Llama.from_pretrained(MODEL_PATH, n_ctx=4096, n_gpu_layers=-1)  # Use GPU if available
+print("✅ Model loaded successfully!")
+app = FastAPI(title="Resume Parsing API", description="Extracts key details from resumes using Mistral 7B")
+# ✅ Extract Text from PDF or DOCX
+def extract_text_from_resume(uploaded_file):
+    file_content = uploaded_file.file.read()
+    file_stream = io.BytesIO(file_content)
+    if uploaded_file.filename.endswith(".pdf"):
+        reader = PdfReader(file_stream)
+        return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
+    elif uploaded_file.filename.endswith(".docx"):
+        doc = Document(file_stream)
+        return "\n".join([para.text for para in doc.paragraphs])
+    return None
+# ✅ Extract Email & Phone Number
+def extract_email_phone(text):
+    email_pattern = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
+    phone_pattern = r"\+?\d{1,3}?[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}"
+    email_match = re.search(email_pattern, text)
+    phone_match = re.search(phone_pattern, text)
+    return {
+        "email": email_match.group() if email_match else "Email not found",
+        "phone": phone_match.group() if phone_match else "Phone not found"
+    }
+# ✅ Analyze Resume using Mistral 7B
+def analyze_resume(text):
+    truncated_text = text[:3500]  # Keep within context limit
+    prompt = f"""
+    Extract these details from the resume:
+    1. Full Name
+    2. Work Experience (Company Names, Roles, Responsibilities, Duration)
+    3. Qualifications (Degrees, Certifications)
+    4. List of Skills
+    Resume Text: {truncated_text}
+    Format response as a **strict JSON object**:
+    {{
+        "name": "Candidate Name",
+        "experience": [
+            {{
+                "company": "Company Name",
+                "role": "Job Title",
+                "duration": "Start Date - End Date",
+                "responsibilities": "Brief work responsibilities"
+            }}
+        ],
+        "qualifications": "Degree, Certifications",
+        "skills": ["List of skills"]
+    }}
+    """
+    response = llm(prompt, max_tokens=700)
+    output = response["choices"][0]["text"].strip()
+    print("🔹 Raw LLaMA Output:\n", output)
+    try:
+        return json.loads(output)
+    except json.JSONDecodeError:
+        return {"error": "Failed to parse LLaMA output", "raw_output": output}
+# ✅ FastAPI Route to Parse Resume
+@app.post("/parse-resume/")
+async def parse_resume(file: UploadFile = File(...)):
+    text = extract_text_from_resume(file)
+    if not text:
+        return {"error": "Unsupported file format or could not extract text"}
+    extracted_info = extract_email_phone(text)
+    llm_data = analyze_resume(text)
+    extracted_info.update(llm_data)
+    return {"success": True, "data": extracted_info}