nlpblogs commited on
Commit
7c7aa59
·
verified ·
1 Parent(s): 7b149ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -36
app.py CHANGED
@@ -41,68 +41,61 @@ with st.sidebar:
41
 
42
 
43
 
44
- st.subheader ("Job Description", divider = "red")
45
-
46
- txt = st.text_area("Paste the job description and then press Ctrl + Enter", key = "text 1")
47
  job = pd.Series(txt, name="Text")
48
 
49
- st.subheader("Candidate Profile 1", divider = "green")
50
-
51
-
52
  if 'upload_count' not in st.session_state:
53
  st.session_state['upload_count'] = 0
54
 
55
  max_attempts = 3
56
-
57
  if st.session_state['upload_count'] < max_attempts:
58
  uploaded_files = st.file_uploader(
59
  "Upload your resume", accept_multiple_files=True, type="pdf", key="candidate 1"
60
  )
61
-
62
- if uploaded_files:
63
- st.session_state['upload_count'] += 1
64
- for uploaded_file in uploaded_files:
65
- pdf_reader = PdfReader(uploaded_file)
66
- text_data = ""
67
- for page in pdf_reader.pages:
68
- text_data += page.extract_text()
69
- data = pd.Series(text_data, name = 'Text')
70
  frames = [job, data]
71
  result = pd.concat(frames)
72
  model = GLiNER.from_pretrained("urchade/gliner_base")
73
- labels = ["person", "country","organization", "role", "skills", "year"]
74
  entities = model.predict_entities(text_data, labels)
75
  df = pd.DataFrame(entities)
76
-
77
  st.title("Profile of candidate 1")
78
  fig = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
79
- values='score', color='label')
80
- fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
81
- st.plotly_chart(fig, key = "figure 1")
82
-
83
-
84
  vectorizer = TfidfVectorizer()
85
  tfidf_matrix = vectorizer.fit_transform(result)
86
  tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
87
  cosine_sim_matrix = cosine_similarity(tfidf_matrix)
88
  cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
89
-
90
  st.subheader("Measuring similarity between keywords of candidate profile 1 and job description")
91
- fig = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
92
- x=['Resume 1', 'Jon Description'],
93
- y=['Resume 1', 'Job Description'])
94
- st.plotly_chart(fig, key = "figure 2")
95
-
 
96
  for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
97
  st.write(f"Similarity of job description with candidate profile 1. {i + 1}: {similarity_score:.4f}")
98
- st.info("A score closer to 1 (0.80, 0.90) means higher similarity between candidate profile 1 and job description. A score closer to 0 (0.20, 0.30) means lower similarity between candidate profile 1 and job description.")
99
-
100
  else:
101
- st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
102
-
103
- if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
104
- st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
105
-
106
 
107
 
108
 
 
41
 
42
 
43
 
44
+ st.subheader("Job Description", divider="red")
45
+ txt = st.text_area("Paste the job description and then press Ctrl + Enter", key="text 1")
 
46
  job = pd.Series(txt, name="Text")
47
 
48
+ st.subheader("Candidate Profile 1", divider="green")
 
 
49
  if 'upload_count' not in st.session_state:
50
  st.session_state['upload_count'] = 0
51
 
52
  max_attempts = 3
 
53
  if st.session_state['upload_count'] < max_attempts:
54
  uploaded_files = st.file_uploader(
55
  "Upload your resume", accept_multiple_files=True, type="pdf", key="candidate 1"
56
  )
57
+ if uploaded_files:
58
+ st.session_state['upload_count'] += 1
59
+ for uploaded_file in uploaded_files:
60
+ pdf_reader = PdfReader(uploaded_file)
61
+ text_data = ""
62
+ for page in pdf_reader.pages:
63
+ text_data += page.extract_text()
64
+ data = pd.Series(text_data, name='Text')
 
65
  frames = [job, data]
66
  result = pd.concat(frames)
67
  model = GLiNER.from_pretrained("urchade/gliner_base")
68
+ labels = ["person", "country", "organization", "role", "skills", "year"]
69
  entities = model.predict_entities(text_data, labels)
70
  df = pd.DataFrame(entities)
71
+
72
  st.title("Profile of candidate 1")
73
  fig = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
74
+ values='score', color='label')
75
+ fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))
76
+ st.plotly_chart(fig, key="figure 1")
77
+
 
78
  vectorizer = TfidfVectorizer()
79
  tfidf_matrix = vectorizer.fit_transform(result)
80
  tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
81
  cosine_sim_matrix = cosine_similarity(tfidf_matrix)
82
  cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
83
+
84
  st.subheader("Measuring similarity between keywords of candidate profile 1 and job description")
85
+ fig = px.imshow(cosine_sim_df, text_auto=True,
86
+ labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
87
+ x=['Resume 1', 'Jon Description'],
88
+ y=['Resume 1', 'Job Description'])
89
+ st.plotly_chart(fig, key="figure 2")
90
+
91
  for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
92
  st.write(f"Similarity of job description with candidate profile 1. {i + 1}: {similarity_score:.4f}")
93
+ st.info(
94
+ "A score closer to 1 (0.80, 0.90) means higher similarity between candidate profile 1 and job description. A score closer to 0 (0.20, 0.30) means lower similarity between candidate profile 1 and job description.")
95
  else:
96
+ st.warning(f"You have reached the maximum upload attempts ({max_attempts}).")
97
+ if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
98
+ st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
 
 
99
 
100
 
101