billusanda007 commited on
Commit
524d0a6
·
verified ·
1 Parent(s): c8db677

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -17
app.py CHANGED
@@ -1,7 +1,8 @@
 
1
  import pandas as pd
2
  import numpy as np
3
  import re
4
- import pickle
5
  import pdfminer
6
  from pdfminer.high_level import extract_text
7
  import pytesseract
@@ -28,7 +29,15 @@ def pdf_to_text(file):
28
  text = "\n".join([pytesseract.image_to_string(img) for img in images])
29
  return text
30
 
 
 
 
 
 
 
 
31
  def load_deeprank_model():
 
32
  return load_model('deeprank_model_v2.h5')
33
 
34
  def predict_category(resumes_data, selected_category, max_sequence_length, model, tokenizer, label):
@@ -47,6 +56,9 @@ def predict_category(resumes_data, selected_category, max_sequence_length, model
47
  return ranks
48
 
49
  def main():
 
 
 
50
  model = load_deeprank_model()
51
  df = pd.read_csv('UpdatedResumeDataSet.csv')
52
  df['cleaned'] = df['Resume'].apply(cleanResume)
@@ -56,24 +68,22 @@ def main():
56
  text = df['cleaned'].values
57
  tokenizer = Tokenizer()
58
  tokenizer.fit_on_texts(text)
59
- vocab_size = len(tokenizer.word_index) + 1
60
- num_classes = len(label.classes_)
61
  max_sequence_length = 500
62
 
63
- resumes_data = []
64
- files = input("Enter the paths of resumes (comma-separated): ").split(',')
65
- for file in files:
66
- text = cleanResume(pdf_to_text(file.strip()))
67
- resumes_data.append({'ResumeText': text, 'FileName': file.strip()})
68
-
69
- print("Available categories:", list(label.classes_))
70
- selected_category = input("Select a category to rank by: ")
71
-
72
- if not resumes_data or selected_category not in label.classes_:
73
- print("Error: Invalid input. Please provide valid resumes and select a valid category.")
74
- else:
75
- ranks = predict_category(resumes_data, selected_category, max_sequence_length, model, tokenizer, label)
76
- print(pd.DataFrame(ranks))
77
 
78
  if __name__ == '__main__':
79
  main()
 
1
+ import streamlit as st
2
  import pandas as pd
3
  import numpy as np
4
  import re
5
+ import h5py
6
  import pdfminer
7
  from pdfminer.high_level import extract_text
8
  import pytesseract
 
29
  text = "\n".join([pytesseract.image_to_string(img) for img in images])
30
  return text
31
 
32
+ def fix_h5_model():
33
+ with h5py.File("deeprank_model_v2.h5", "r+") as f:
34
+ if "model_config" in f.attrs:
35
+ model_config = f.attrs["model_config"]
36
+ updated_config = model_config.replace(b'"time_major": false', b"")
37
+ f.attrs.modify("model_config", updated_config)
38
+
39
  def load_deeprank_model():
40
+ fix_h5_model()
41
  return load_model('deeprank_model_v2.h5')
42
 
43
  def predict_category(resumes_data, selected_category, max_sequence_length, model, tokenizer, label):
 
56
  return ranks
57
 
58
  def main():
59
+ st.title("Resume Ranking App")
60
+ st.write("Upload resumes and select a category to rank them based on their relevance.")
61
+
62
  model = load_deeprank_model()
63
  df = pd.read_csv('UpdatedResumeDataSet.csv')
64
  df['cleaned'] = df['Resume'].apply(cleanResume)
 
68
  text = df['cleaned'].values
69
  tokenizer = Tokenizer()
70
  tokenizer.fit_on_texts(text)
 
 
71
  max_sequence_length = 500
72
 
73
+ uploaded_files = st.file_uploader("Upload Resumes (PDFs)", type=["pdf"], accept_multiple_files=True)
74
+ if uploaded_files:
75
+ resumes_data = []
76
+ for file in uploaded_files:
77
+ text = cleanResume(pdf_to_text(file))
78
+ resumes_data.append({'ResumeText': text, 'FileName': file.name})
79
+
80
+ selected_category = st.selectbox("Select a category to rank by", list(label.classes_))
81
+ if st.button("Rank Resumes"):
82
+ if resumes_data and selected_category:
83
+ ranks = predict_category(resumes_data, selected_category, max_sequence_length, model, tokenizer, label)
84
+ st.write(pd.DataFrame(ranks))
85
+ else:
86
+ st.error("Please upload valid resumes and select a valid category.")
87
 
88
  if __name__ == '__main__':
89
  main()