nlpblogs commited on
Commit
20bdb07
·
verified ·
1 Parent(s): a245af0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -31
app.py CHANGED
@@ -18,54 +18,47 @@ import pandas as pd
18
  from PyPDF2 import PdfReader
19
  from gliner import GLiNER
20
 
21
- txt = st.text_area("Job description")
22
- data1 = pd.Series(txt, name = "Text")
23
- st.dataframe(data1)
24
-
25
 
 
 
 
26
 
27
  uploaded_files = st.file_uploader(
28
- "Choose a PDF file(s) and job description as pdf", accept_multiple_files=True, type="pdf"
29
  )
30
 
 
 
31
  if uploaded_files:
32
- all_data = [] # Store dictionaries of text and entities for each PDF
33
  for uploaded_file in uploaded_files:
34
  try:
35
  pdf_reader = PdfReader(uploaded_file)
36
  text_data = ""
37
  for page in pdf_reader.pages:
38
  text_data += page.extract_text()
 
 
 
39
 
40
- model = GLiNER.from_pretrained("urchade/gliner_base")
41
- labels = ["person", "country", "organization", "time", "role"]
42
- entities = model.predict_entities(text_data, labels)
43
 
44
- entity_dict = {}
45
- for label in labels:
46
- entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]
47
 
48
- data = {"Text": text_data, **entity_dict}
49
- all_data.append(data)
 
50
 
51
- except Exception as e:
52
- st.error(f"Error processing file {uploaded_file.name}: {e}")
 
 
53
 
54
- if all_data:
55
- df = pd.DataFrame(all_data)
56
- frames = [data1, df]
57
- result = pd.concat(frames)
58
- st.dataframe(result)
59
-
60
- y = result[['Text']]
61
- st.dataframe(y)
62
-
63
- vec = TfidfVectorizer()
64
- tf_idf = vec.fit_transform(y)
65
- x = pd.DataFrame(tf_idf.toarray(), columns=vec.get_feature_names_out())
66
- st.dataframe(x)
67
- cosine_sim = cosine_similarity(tf_idf, tf_idf)
68
- st.dataframe(cosine_sim)
69
 
70
 
71
 
 
18
  from PyPDF2 import PdfReader
19
  from gliner import GLiNER
20
 
 
 
 
 
21
 
22
+ txt = st.text_area("Job description")
23
+ job_description_series = pd.Series([txt], name="Text")
24
+ st.dataframe(job_description_series)
25
 
26
  uploaded_files = st.file_uploader(
27
+ "Choose a PDF file(s) for candidate profiles", accept_multiple_files=True, type="pdf"
28
  )
29
 
30
+ all_resumes_text = [] # Store the text content of each PDF
31
+
32
  if uploaded_files:
 
33
  for uploaded_file in uploaded_files:
34
  try:
35
  pdf_reader = PdfReader(uploaded_file)
36
  text_data = ""
37
  for page in pdf_reader.pages:
38
  text_data += page.extract_text()
39
+ all_resumes_text.append(text_data)
40
+ except Exception as e:
41
+ st.error(f"Error processing file {uploaded_file.name}: {e}")
42
 
43
+ if all_resumes_text:
44
+ all_documents = [job_description_series.iloc[0]] + all_resumes_text
 
45
 
46
+ vectorizer = TfidfVectorizer()
47
+ tfidf_matrix = vectorizer.fit_transform(all_documents)
 
48
 
49
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
50
+ st.subheader("TF-IDF Values:")
51
+ st.dataframe(tfidf_df)
52
 
53
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
54
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
55
+ st.subheader("Cosine Similarity Matrix:")
56
+ st.dataframe(cosine_sim_df)
57
 
58
+ # Display similarity scores between the job description and each resume
59
+ st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
60
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
61
+ st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
 
 
 
 
 
 
 
 
 
 
 
62
 
63
 
64