nlpblogs commited on
Commit
319dddf
·
verified ·
1 Parent(s): a582cba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -12
app.py CHANGED
@@ -6,20 +6,32 @@ from sklearn.feature_extraction.text import TfidfVectorizer
6
  from sklearn.metrics.pairwise import cosine_similarity
7
 
8
  uploaded_files = st.file_uploader(
9
- "Choose a CSV file", accept_multiple_files=True
10
  )
 
 
 
11
  for uploaded_file in uploaded_files:
12
- pdf_reader = PdfReader(uploaded_file) # read your PDF file
13
- # extract the text data from your PDF file after looping through its pages with the .extract_text() method
14
- text_data= ""
15
- for page in pdf_reader.pages: # for loop method
16
- text_data+= page.extract_text()
17
-
18
-
19
-
20
- data = pd.Series(text_data, index = ["Resume"])
21
- st.dataframe(data) # view the text data
22
-
 
 
 
 
 
 
 
 
 
23
 
24
 
25
 
 
6
  from sklearn.metrics.pairwise import cosine_similarity
7
 
8
  uploaded_files = st.file_uploader(
9
+ "Choose a PDF file(s)", accept_multiple_files=True, type=["pdf"] # Corrected label and added type
10
  )
11
+
12
+ all_series = [] # Initialize an empty list to store Pandas Series
13
+
14
  for uploaded_file in uploaded_files:
15
+ try:
16
+ pdf_reader = PdfReader(uploaded_file)
17
+ text_data = ""
18
+ for page in pdf_reader.pages:
19
+ text_data += page.extract_text()
20
+
21
+ # Create a Pandas Series for each PDF
22
+ data = pd.Series({"Resume": text_data}) # Use a dictionary to name the Series
23
+ all_series.append(data)
24
+ st.subheader(f"Content of {uploaded_file.name}")
25
+ st.dataframe(data) # Display the Series for the current PDF
26
+
27
+ except Exception as e:
28
+ st.error(f"Error processing {uploaded_file.name}: {e}")
29
+
30
+ if all_series:
31
+ # Concatenate all the Series into a DataFrame
32
+ all_data_df = pd.DataFrame(all_series)
33
+ st.subheader("Combined Resume Texts")
34
+ st.dataframe(all_data_df)
35
 
36
 
37