root
commited on
Commit
·
98627c1
1
Parent(s):
276589d
ss
Browse files- app.py +53 -8
- fix_dependencies.py +7 -2
- requirements.txt +7 -2
app.py
CHANGED
@@ -6,7 +6,41 @@ import re
|
|
6 |
import pandas as pd
|
7 |
import matplotlib.pyplot as plt
|
8 |
from transformers import pipeline
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
import subprocess
|
11 |
import sys
|
12 |
import torch
|
@@ -59,15 +93,26 @@ def download_spacy_model():
|
|
59 |
# Load the NLP models
|
60 |
@st.cache_resource
|
61 |
def load_models():
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
64 |
|
65 |
-
# Load sentence transformer for semantic matching
|
66 |
try:
|
67 |
-
|
68 |
except Exception as e:
|
69 |
-
st.error(f"Failed to load
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
return summarizer, nlp, sentence_model
|
73 |
|
@@ -461,7 +506,7 @@ def analyze_resume(text, job_title, sentence_model):
|
|
461 |
|
462 |
# Semantic matching with job description
|
463 |
semantic_score = 0
|
464 |
-
if sentence_model:
|
465 |
try:
|
466 |
resume_embedding = sentence_model.encode(text[:5000]) # Limit to first 5000 chars to avoid memory issues
|
467 |
job_embedding = sentence_model.encode(job_descriptions[job_title]["semantic_description"])
|
|
|
6 |
import pandas as pd
|
7 |
import matplotlib.pyplot as plt
|
8 |
from transformers import pipeline
|
9 |
+
# Import SentenceTransformer with try-except
|
10 |
+
try:
|
11 |
+
from sentence_transformers import SentenceTransformer
|
12 |
+
# Try to import util, if it fails, we'll create our own minimal version
|
13 |
+
try:
|
14 |
+
from sentence_transformers import util
|
15 |
+
except ImportError:
|
16 |
+
# Create a minimal util module replacement with the functions we need
|
17 |
+
class util:
|
18 |
+
@staticmethod
|
19 |
+
def pytorch_cos_sim(a, b):
|
20 |
+
"""
|
21 |
+
Compute cosine similarity between two PyTorch tensors
|
22 |
+
"""
|
23 |
+
import torch
|
24 |
+
if not isinstance(a, torch.Tensor):
|
25 |
+
a = torch.tensor(a)
|
26 |
+
if not isinstance(b, torch.Tensor):
|
27 |
+
b = torch.tensor(b)
|
28 |
+
|
29 |
+
if len(a.shape) == 1:
|
30 |
+
a = a.unsqueeze(0)
|
31 |
+
if len(b.shape) == 1:
|
32 |
+
b = b.unsqueeze(0)
|
33 |
+
|
34 |
+
a_norm = torch.nn.functional.normalize(a, p=2, dim=1)
|
35 |
+
b_norm = torch.nn.functional.normalize(b, p=2, dim=1)
|
36 |
+
return torch.mm(a_norm, b_norm.transpose(0, 1))
|
37 |
+
except ImportError:
|
38 |
+
st.error("Failed to import SentenceTransformer. Semantic matching will be disabled.")
|
39 |
+
SentenceTransformer = None
|
40 |
+
class util:
|
41 |
+
@staticmethod
|
42 |
+
def pytorch_cos_sim(*args, **kwargs):
|
43 |
+
return 0
|
44 |
import subprocess
|
45 |
import sys
|
46 |
import torch
|
|
|
93 |
# Load the NLP models
|
94 |
@st.cache_resource
|
95 |
def load_models():
|
96 |
+
try:
|
97 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
98 |
+
except Exception as e:
|
99 |
+
st.error(f"Failed to load summarization model: {str(e)}")
|
100 |
+
# Fallback to a simpler summarizer that just takes the first few sentences
|
101 |
+
summarizer = lambda text, **kwargs: [{"summary_text": ". ".join(text.split(". ")[:3]) + "."}]
|
102 |
|
|
|
103 |
try:
|
104 |
+
nlp = download_spacy_model()
|
105 |
except Exception as e:
|
106 |
+
st.error(f"Failed to load spaCy model: {str(e)}")
|
107 |
+
nlp = None
|
108 |
+
|
109 |
+
# Load sentence transformer for semantic matching
|
110 |
+
sentence_model = None
|
111 |
+
if SentenceTransformer is not None:
|
112 |
+
try:
|
113 |
+
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
|
114 |
+
except Exception as e:
|
115 |
+
st.error(f"Failed to load sentence transformer: {str(e)}")
|
116 |
|
117 |
return summarizer, nlp, sentence_model
|
118 |
|
|
|
506 |
|
507 |
# Semantic matching with job description
|
508 |
semantic_score = 0
|
509 |
+
if sentence_model is not None and SentenceTransformer is not None:
|
510 |
try:
|
511 |
resume_embedding = sentence_model.encode(text[:5000]) # Limit to first 5000 chars to avoid memory issues
|
512 |
job_embedding = sentence_model.encode(job_descriptions[job_title]["semantic_description"])
|
fix_dependencies.py
CHANGED
@@ -13,7 +13,7 @@ def fix_dependencies():
|
|
13 |
"pdfplumber==0.9.0",
|
14 |
"spacy==3.5.0",
|
15 |
"transformers==4.28.1",
|
16 |
-
"torch
|
17 |
"huggingface-hub==0.14.1",
|
18 |
"sentence-transformers==2.2.2",
|
19 |
"nltk==3.8.1",
|
@@ -21,7 +21,12 @@ def fix_dependencies():
|
|
21 |
"pandas==1.5.3",
|
22 |
"numpy==1.24.3",
|
23 |
"matplotlib==3.7.1",
|
24 |
-
"pydantic
|
|
|
|
|
|
|
|
|
|
|
25 |
]
|
26 |
|
27 |
# Install each package
|
|
|
13 |
"pdfplumber==0.9.0",
|
14 |
"spacy==3.5.0",
|
15 |
"transformers==4.28.1",
|
16 |
+
"torch==1.13.1",
|
17 |
"huggingface-hub==0.14.1",
|
18 |
"sentence-transformers==2.2.2",
|
19 |
"nltk==3.8.1",
|
|
|
21 |
"pandas==1.5.3",
|
22 |
"numpy==1.24.3",
|
23 |
"matplotlib==3.7.1",
|
24 |
+
"pydantic==1.10.8",
|
25 |
+
"protobuf<4.0.0",
|
26 |
+
"tqdm>=4.27",
|
27 |
+
"regex>=2022.1.18",
|
28 |
+
"scikit-learn==1.0.2",
|
29 |
+
"scipy==1.8.1"
|
30 |
]
|
31 |
|
32 |
# Install each package
|
requirements.txt
CHANGED
@@ -2,7 +2,7 @@ streamlit==1.22.0
|
|
2 |
pdfplumber==0.9.0
|
3 |
spacy==3.5.0
|
4 |
transformers==4.28.1
|
5 |
-
torch
|
6 |
huggingface-hub==0.14.1
|
7 |
sentence-transformers==2.2.2
|
8 |
nltk==3.8.1
|
@@ -10,4 +10,9 @@ plotly==5.14.1
|
|
10 |
pandas==1.5.3
|
11 |
numpy==1.24.3
|
12 |
matplotlib==3.7.1
|
13 |
-
pydantic
|
|
|
|
|
|
|
|
|
|
|
|
2 |
pdfplumber==0.9.0
|
3 |
spacy==3.5.0
|
4 |
transformers==4.28.1
|
5 |
+
torch==1.13.1
|
6 |
huggingface-hub==0.14.1
|
7 |
sentence-transformers==2.2.2
|
8 |
nltk==3.8.1
|
|
|
10 |
pandas==1.5.3
|
11 |
numpy==1.24.3
|
12 |
matplotlib==3.7.1
|
13 |
+
pydantic==1.10.8
|
14 |
+
protobuf<4.0.0
|
15 |
+
tqdm>=4.27
|
16 |
+
regex>=2022.1.18
|
17 |
+
scikit-learn==1.0.2
|
18 |
+
scipy==1.8.1
|