shoaibmoghal commited on
Commit
92563a0
·
verified ·
1 Parent(s): cd4bd3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -23
app.py CHANGED
@@ -1,29 +1,23 @@
1
  from fastapi import FastAPI, UploadFile, File
2
- import json, re, io, os, requests
3
  from llama_cpp import Llama
4
  from PyPDF2 import PdfReader
5
  from docx import Document
 
6
 
7
- # ✅ Define model URL and path
8
- MODEL_URL = "https://huggingface.co/TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF/resolve/main/capybarahermes-2.5-mistral-7b.Q5_K_M.gguf"
9
- MODEL_DIR = "/tmp/models" # ✅ Change directory to `/tmp`
10
- MODEL_PATH = os.path.join(MODEL_DIR, "mistral-7b.Q5_K_M.gguf")
11
-
12
- # ✅ Ensure models directory exists (Using `/tmp` which is writable)
13
- os.makedirs(MODEL_DIR, exist_ok=True)
14
-
15
- # Download model if not already available
16
- if not os.path.exists(MODEL_PATH):
17
- print(f"🔹 Downloading model from: {MODEL_URL}")
18
- response = requests.get(MODEL_URL, stream=True)
19
- with open(MODEL_PATH, "wb") as file:
20
- for chunk in response.iter_content(chunk_size=8192):
21
- file.write(chunk)
22
- print("✅ Model downloaded successfully!")
23
-
24
- # ✅ Load Mistral 7B using llama_cpp
25
- print(f"🔹 Loading Mistral 7B from {MODEL_PATH} (This may take a while)")
26
- llm = Llama(model_path=MODEL_PATH, n_ctx=4096, n_gpu_layers=-1) # Use GPU if available
27
  print("✅ Model loaded successfully!")
28
 
29
  app = FastAPI(title="Resume Parsing API", description="Extracts key details from resumes using Mistral 7B")
@@ -56,7 +50,7 @@ def extract_email_phone(text):
56
 
57
  # ✅ Analyze Resume using Mistral 7B
58
  def analyze_resume(text):
59
- truncated_text = text[:3500] # Keep within context limit
60
 
61
  prompt = f"""
62
  Extract these details from the resume:
@@ -83,7 +77,7 @@ def analyze_resume(text):
83
  }}
84
  """
85
 
86
- response = llm(prompt, max_tokens=700)
87
  output = response["choices"][0]["text"].strip()
88
  print("🔹 Raw LLaMA Output:\n", output)
89
 
 
1
  from fastapi import FastAPI, UploadFile, File
2
+ import json, re, io, os
3
  from llama_cpp import Llama
4
  from PyPDF2 import PdfReader
5
  from docx import Document
6
+ from huggingface_hub import hf_hub_download
7
 
8
+ # ✅ Use a Smaller Quantized Model for Faster CPU Inference
9
+ MODEL_REPO = "TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF"
10
+ MODEL_FILE = "capybarahermes-2.5-mistral-7b.Q4_K_M.gguf" # ✅ Using Q4_K_M instead of Q5_K_M
11
+ CACHE_DIR = "/tmp/hf_cache" # ✅ Hugging Face cache to avoid re-downloading
12
+
13
+ # ✅ Load Model from Hugging Face Cache
14
+ MODEL_PATH = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, cache_dir=CACHE_DIR)
15
+
16
+ print(f"Model cached at {MODEL_PATH}")
17
+
18
+ # Load Mistral 7B with Optimized Settings
19
+ print(f"🔹 Loading Mistral 7B (Q4_K_M) from {MODEL_PATH} (This may take a while)")
20
+ llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_gpu_layers=0) # Reduced context length & forced CPU
 
 
 
 
 
 
 
21
  print("✅ Model loaded successfully!")
22
 
23
  app = FastAPI(title="Resume Parsing API", description="Extracts key details from resumes using Mistral 7B")
 
50
 
51
  # ✅ Analyze Resume using Mistral 7B
52
  def analyze_resume(text):
53
+ truncated_text = text[:2048] # Further reduced text size for faster processing
54
 
55
  prompt = f"""
56
  Extract these details from the resume:
 
77
  }}
78
  """
79
 
80
+ response = llm(prompt, max_tokens=500) # ✅ Reduced max tokens for quicker response
81
  output = response["choices"][0]["text"].strip()
82
  print("🔹 Raw LLaMA Output:\n", output)
83