Spaces:

shoaibmoghal
/

resume-parser-fastapi

Sleeping

App Files Files Community

shoaibmoghal commited on Feb 14

Commit

92563a0

verified ·

1 Parent(s): cd4bd3b

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -23

app.py CHANGED Viewed

@@ -1,29 +1,23 @@
 from fastapi import FastAPI, UploadFile, File
-import json, re, io, os, requests
 from llama_cpp import Llama
 from PyPDF2 import PdfReader
 from docx import Document
-# ✅ Define model URL and path
-MODEL_URL = "https://huggingface.co/TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF/resolve/main/capybarahermes-2.5-mistral-7b.Q5_K_M.gguf"
-MODEL_DIR = "/tmp/models"  # ✅ Change directory to `/tmp`
-MODEL_PATH = os.path.join(MODEL_DIR, "mistral-7b.Q5_K_M.gguf")
-# ✅ Ensure models directory exists (Using `/tmp` which is writable)
-os.makedirs(MODEL_DIR, exist_ok=True)
-# ✅ Download model if not already available
-if not os.path.exists(MODEL_PATH):
-    print(f"🔹 Downloading model from: {MODEL_URL}")
-    response = requests.get(MODEL_URL, stream=True)
-    with open(MODEL_PATH, "wb") as file:
-        for chunk in response.iter_content(chunk_size=8192):
-            file.write(chunk)
-    print("✅ Model downloaded successfully!")
-# ✅ Load Mistral 7B using llama_cpp
-print(f"🔹 Loading Mistral 7B from {MODEL_PATH} (This may take a while)")
-llm = Llama(model_path=MODEL_PATH, n_ctx=4096, n_gpu_layers=-1)  # Use GPU if available
 print("✅ Model loaded successfully!")
 app = FastAPI(title="Resume Parsing API", description="Extracts key details from resumes using Mistral 7B")
@@ -56,7 +50,7 @@ def extract_email_phone(text):
 # ✅ Analyze Resume using Mistral 7B
 def analyze_resume(text):
-    truncated_text = text[:3500]  # Keep within context limit
     prompt = f"""
     Extract these details from the resume:
@@ -83,7 +77,7 @@ def analyze_resume(text):
     }}
     """
-    response = llm(prompt, max_tokens=700)
     output = response["choices"][0]["text"].strip()
     print("🔹 Raw LLaMA Output:\n", output)

 from fastapi import FastAPI, UploadFile, File
+import json, re, io, os
 from llama_cpp import Llama
 from PyPDF2 import PdfReader
 from docx import Document
+from huggingface_hub import hf_hub_download
+# ✅ Use a Smaller Quantized Model for Faster CPU Inference
+MODEL_REPO = "TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF"
+MODEL_FILE = "capybarahermes-2.5-mistral-7b.Q4_K_M.gguf"  # ✅ Using Q4_K_M instead of Q5_K_M
+CACHE_DIR = "/tmp/hf_cache"  # ✅ Hugging Face cache to avoid re-downloading
+# ✅ Load Model from Hugging Face Cache
+MODEL_PATH = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, cache_dir=CACHE_DIR)
+print(f"✅ Model cached at {MODEL_PATH}")
+# ✅ Load Mistral 7B with Optimized Settings
+print(f"🔹 Loading Mistral 7B (Q4_K_M) from {MODEL_PATH} (This may take a while)")
+llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_gpu_layers=0)  # ✅ Reduced context length & forced CPU
 print("✅ Model loaded successfully!")
 app = FastAPI(title="Resume Parsing API", description="Extracts key details from resumes using Mistral 7B")
 # ✅ Analyze Resume using Mistral 7B
 def analyze_resume(text):
+    truncated_text = text[:2048]  # ✅ Further reduced text size for faster processing
     prompt = f"""
     Extract these details from the resume:
     }}
     """
+    response = llm(prompt, max_tokens=500)  # ✅ Reduced max tokens for quicker response
     output = response["choices"][0]["text"].strip()
     print("🔹 Raw LLaMA Output:\n", output)