nlpblogs commited on
Commit
7b149ac
·
verified ·
1 Parent(s): 9f7e85b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -48
app.py CHANGED
@@ -41,12 +41,12 @@ with st.sidebar:
41
 
42
 
43
 
44
- st.subheader ("Job Description", divider = "orange")
45
 
46
  txt = st.text_area("Paste the job description and then press Ctrl + Enter", key = "text 1")
47
  job = pd.Series(txt, name="Text")
48
 
49
- st.subheader("Candidate Profile 1", divider = "red")
50
 
51
 
52
  if 'upload_count' not in st.session_state:
@@ -61,58 +61,47 @@ if st.session_state['upload_count'] < max_attempts:
61
 
62
  if uploaded_files:
63
  st.session_state['upload_count'] += 1
64
- for uploaded_file in uploaded_files:
65
- pdf_reader = PdfReader(uploaded_file)
66
- text_data = ""
67
- for page in pdf_reader.pages:
68
- text_data += page.extract_text()
69
- data = pd.Series(text_data, name = 'Text')
70
-
71
-
72
- frames = [job, data]
73
- result = pd.concat(frames)
74
-
75
-
76
- model = GLiNER.from_pretrained("urchade/gliner_base")
77
- labels = ["person", "country","organization", "role", "skills", "year"]
78
- entities = model.predict_entities(text_data, labels)
79
- df = pd.DataFrame(entities)
80
-
81
-
82
- st.title("Profile of candidate 1")
83
- fig = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
84
  values='score', color='label')
85
- fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
86
- st.plotly_chart(fig, key = "figure 1")
87
 
88
 
 
 
 
 
 
89
 
90
-
91
- vectorizer = TfidfVectorizer()
92
- tfidf_matrix = vectorizer.fit_transform(result)
93
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
94
-
95
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
96
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
97
-
98
-
99
-
100
- st.subheader("Measuring similarity between keywords of candidate profile 1 and job description")
101
- fig = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
102
  x=['Resume 1', 'Jon Description'],
103
  y=['Resume 1', 'Job Description'])
104
- st.plotly_chart(fig, key = "figure 2")
105
-
106
-
107
-
108
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
109
- st.write(f"Similarity of job description with candidate profile 1. {i + 1}: {similarity_score:.4f}")
110
- st.info("A score closer to 1 (0.80, 0.90) means higher similarity between candidate profile 1 and job description. A score closer to 0 (0.20, 0.30) means lower similarity between candidate profile 1 and job description.")
111
 
112
  else:
113
  st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
114
- if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
115
- st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
 
116
 
117
 
118
 
@@ -122,9 +111,7 @@ else:
122
 
123
 
124
 
125
- # fix else: warning
126
- # fix, keys
127
- # st.title
128
 
129
 
130
 
 
41
 
42
 
43
 
44
+ st.subheader ("Job Description", divider = "red")
45
 
46
  txt = st.text_area("Paste the job description and then press Ctrl + Enter", key = "text 1")
47
  job = pd.Series(txt, name="Text")
48
 
49
+ st.subheader("Candidate Profile 1", divider = "green")
50
 
51
 
52
  if 'upload_count' not in st.session_state:
 
61
 
62
  if uploaded_files:
63
  st.session_state['upload_count'] += 1
64
+ for uploaded_file in uploaded_files:
65
+ pdf_reader = PdfReader(uploaded_file)
66
+ text_data = ""
67
+ for page in pdf_reader.pages:
68
+ text_data += page.extract_text()
69
+ data = pd.Series(text_data, name = 'Text')
70
+ frames = [job, data]
71
+ result = pd.concat(frames)
72
+ model = GLiNER.from_pretrained("urchade/gliner_base")
73
+ labels = ["person", "country","organization", "role", "skills", "year"]
74
+ entities = model.predict_entities(text_data, labels)
75
+ df = pd.DataFrame(entities)
76
+
77
+ st.title("Profile of candidate 1")
78
+ fig = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
 
 
 
 
 
79
  values='score', color='label')
80
+ fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
81
+ st.plotly_chart(fig, key = "figure 1")
82
 
83
 
84
+ vectorizer = TfidfVectorizer()
85
+ tfidf_matrix = vectorizer.fit_transform(result)
86
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
87
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
88
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
89
 
90
+ st.subheader("Measuring similarity between keywords of candidate profile 1 and job description")
91
+ fig = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
 
 
 
 
 
 
 
 
 
 
92
  x=['Resume 1', 'Jon Description'],
93
  y=['Resume 1', 'Job Description'])
94
+ st.plotly_chart(fig, key = "figure 2")
95
+
96
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
97
+ st.write(f"Similarity of job description with candidate profile 1. {i + 1}: {similarity_score:.4f}")
98
+ st.info("A score closer to 1 (0.80, 0.90) means higher similarity between candidate profile 1 and job description. A score closer to 0 (0.20, 0.30) means lower similarity between candidate profile 1 and job description.")
 
 
99
 
100
  else:
101
  st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
102
+
103
+ if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
104
+ st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
105
 
106
 
107
 
 
111
 
112
 
113
 
114
+
 
 
115
 
116
 
117