Muhammad Salman Akbar commited on
Commit
eebc9f2
·
verified ·
1 Parent(s): b207451

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +215 -0
app.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import PyPDF2
3
+ import re
4
+ import torch
5
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM
6
+ from groq import Groq
7
+ import gradio as gr
8
+ from docxtpl import DocxTemplate
9
+ from datetime import datetime
10
+
11
+ # Set your API key
12
+ os.environ["GROQ_API_KEY"] = "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"
13
+ # Initialize Groq client
14
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
15
+
16
+ # --- Resume Extraction Functions ---
17
+ def extract_text_from_pdf(pdf_file_path):
18
+ """Extracts text from a PDF file."""
19
+ with open(pdf_file_path, 'rb') as pdf_file:
20
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
21
+ text = ''
22
+ for page in range(len(pdf_reader.pages)):
23
+ text += pdf_reader.pages[page].extract_text()
24
+ return text
25
+
26
+ def extract_text_from_txt(txt_file_path):
27
+ """Extracts text from a .txt file."""
28
+ with open(txt_file_path, 'r') as txt_file:
29
+ text = txt_file.read()
30
+ return text
31
+
32
+ # --- Skill Extraction with Llama Model ---
33
+ def extract_skills_llama(text):
34
+ """Extracts skills from the text using the Llama model via Groq API."""
35
+ chat_completion = client.chat.completions.create(
36
+ messages=[
37
+ {
38
+ "role": "user",
39
+ "content": f"Extract skills from the following text: {text}",
40
+ }
41
+ ],
42
+ model="llama3-70b-8192", # Using Llama model
43
+ )
44
+ skills = chat_completion.choices[0].message.content.split(', ') # Assuming skills are returned as a comma-separated list
45
+ return skills
46
+
47
+ # --- Job Description Processing ---
48
+ def process_job_description(job_description_text):
49
+ """Processes the job description text."""
50
+ # 1. Preprocess the job description text
51
+ job_description_text = preprocess_text(job_description_text)
52
+ # 2. Extract skills from the job description using Llama
53
+ job_description_skills = extract_skills_llama(job_description_text)
54
+ return job_description_skills
55
+
56
+ # --- Text Preprocessing ---
57
+ def preprocess_text(text):
58
+ """Preprocesses text for better analysis."""
59
+ text = text.lower() # Convert to lowercase
60
+ text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
61
+ text = re.sub(r'\s+', ' ', text) # Remove extra whitespace
62
+ return text
63
+
64
+ # --- Resume Similarity ---
65
+ def calculate_resume_similarity(resume_text, job_description_text):
66
+ """Calculates the similarity between the resume and job description using a Hugging Face model."""
67
+ model_name = "cross-encoder/stsb-roberta-base"
68
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
69
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
70
+ inputs = tokenizer(resume_text, job_description_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
71
+ with torch.no_grad():
72
+ outputs = model(**inputs)
73
+ similarity_score = torch.sigmoid(outputs.logits).item()
74
+ return similarity_score
75
+
76
+ # --- Communication Generation ---
77
+ def communication_generator(message, max_length=100):
78
+ """Generates a communication response based on the input message using a Hugging Face model."""
79
+ model_name = "google/flan-t5-base"
80
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
81
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
82
+ inputs = tokenizer(message, return_tensors="pt", padding=True, truncation=True, max_length=512)
83
+ response = model.generate(**inputs, max_length=max_length, num_beams=4, early_stopping=True)
84
+ generated_response = tokenizer.batch_decode(response, skip_special_tokens=True)[0]
85
+ return generated_response + " We look forward to getting in touch with you soon!"
86
+
87
+ # --- Sentiment Analysis ---
88
+ def sentiment_model(text):
89
+ """Analyzes the sentiment of the text using a Hugging Face model."""
90
+ model_name = "distilbert-base-uncased-finetuned-sst-3-literal"
91
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
92
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
93
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
94
+ with torch.no_grad():
95
+ outputs = model(**inputs)
96
+ predicted_class = torch.argmax(outputs.logits).item()
97
+ sentiment_labels = {0: "Negative", 1: "Neutral", 2: "Positive"}
98
+ return sentiment_labels[predicted_class]
99
+
100
+ # --- Placeholder Functions for Enhancement ---
101
+ def enhance_resume(resume_text):
102
+ """Placeholder function for enhancing the resume (you can implement your own logic here)."""
103
+ return resume_text
104
+
105
+ def enhance_job_description(job_description_text):
106
+ """Placeholder function for enhancing the job description (you can implement your own logic here)."""
107
+ return job_description_text
108
+
109
+ # --- Resume Analysis Function ---
110
+ def analyze_resume(resume_file, job_description_file):
111
+ """Analyzes the resume and job description."""
112
+ if resume_file.name.endswith(('.pdf', '.txt')):
113
+ if resume_file.name.endswith('.pdf'):
114
+ resume_text = extract_text_from_pdf(resume_file.name)
115
+ else:
116
+ resume_text = extract_text_from_txt(resume_file.name)
117
+ else:
118
+ return "Invalid file type. Please upload a PDF or TXT file for the resume."
119
+
120
+ if job_description_file.name.endswith('.txt'):
121
+ job_description_text = extract_text_from_txt(job_description_file.name)
122
+ else:
123
+ return "Invalid file type. Please upload a TXT file for the job description."
124
+
125
+ job_description_skills = process_job_description(job_description_text)
126
+ resume_skills = extract_skills_llama(resume_text)
127
+ similarity_score = calculate_resume_similarity(resume_text, job_description_text)
128
+ communication_response = communication_generator(f"I am reviewing a resume for a {job_description_text} position. The candidate has the following skills: {', '.join(resume_skills)}")
129
+ sentiment = sentiment_model(resume_text)
130
+ enhanced_resume = enhance_resume(resume_text)
131
+ enhanced_job_description = enhance_job_description(job_description_text)
132
+
133
+ return (
134
+ f"## Resume and Job Description Analysis",
135
+ f"**Similarity Score:** {similarity_score:.2f}",
136
+ f"**Communication Response:** {communication_response}",
137
+ f"**Sentiment:** {sentiment}",
138
+ f"**Resume Skills:** {', '.join(resume_skills)}",
139
+ f"**Job Description Skills:** {', '.join(job_description_skills)}",
140
+ f"**Enhanced Resume:**\n{enhanced_resume}",
141
+ f"**Enhanced Job Description:**\n{enhanced_job_description}",
142
+ )
143
+
144
+ # --- Offer Letter Generation ---
145
+ def generate_offer_letter(template_file, candidate_name, role, start_date, hours):
146
+ """Generates an offer letter."""
147
+ # Parse the start date string
148
+ try:
149
+ start_date = datetime.strptime(start_date, "%Y-%m-%d").strftime("%B %d, %Y") # Format for DocxTemplate
150
+ except ValueError:
151
+ return "Invalid date format. Please use YYYY-MM-DD."
152
+
153
+ # Define the context variables
154
+ context = {
155
+ 'candidate_name': candidate_name,
156
+ 'role': role,
157
+ 'start_date': start_date,
158
+ 'hours': hours,
159
+ }
160
+
161
+ # Load the template document and render it with the context variables
162
+ tpl = DocxTemplate(template_file.name)
163
+ tpl.render(context)
164
+
165
+ # Save the generated document
166
+ script_dir = os.path.dirname(os.path.abspath(__file__))
167
+ docx_file_path = os.path.join(script_dir, f"{candidate_name}_offer_letter.docx")
168
+ tpl.save(docx_file_path)
169
+
170
+ # Return the file object
171
+ return open(docx_file_path, 'rb')
172
+
173
+ # --- Gradio Interface ---
174
+ demo = gr.Interface(
175
+ fn=analyze_resume,
176
+ inputs=[
177
+ gr.File(label="Upload Resume (PDF or TXT)"),
178
+ gr.File(label="Upload Job Description (TXT)"),
179
+ ],
180
+ outputs=[
181
+ gr.Textbox(label="Similarity Score"),
182
+ gr.Textbox(label="Communication Response"),
183
+ gr.Textbox(label="Sentiment Analysis"),
184
+ gr.Textbox(label="Resume Skills"),
185
+ gr.Textbox(label="Job Description Skills"),
186
+ gr.Textbox(label="Enhanced Resume", lines=20),
187
+ gr.Textbox(label="Enhanced Job Description", lines=10),
188
+ ],
189
+ title="Resume and Job Description Analyzer",
190
+ description="Upload your resume (PDF or TXT) and job description (TXT) to analyze their similarity, extract skills, and generate a communication response.",
191
+ )
192
+
193
+ offer_demo = gr.Interface(
194
+ fn=generate_offer_letter,
195
+ inputs=[
196
+ gr.File(label="Upload Offer Letter Template (DOCX)"),
197
+ gr.Textbox(label="Candidate Name"),
198
+ gr.Textbox(label="Role"),
199
+ gr.Textbox(label="Start Date (YYYY-MM-DD)"), # Use Textbox for date
200
+ gr.Number(label="Hours per Week"),
201
+ ],
202
+ outputs=gr.File(label="Offer Letter"), # Change to gr.File
203
+ title="Offer Letter Generator",
204
+ description="Upload an offer letter template and enter candidate information to generate an offer letter.",
205
+ )
206
+
207
+ # Combine the interfaces using a Tabbed interface
208
+ demo = gr.TabbedInterface(
209
+ [demo, offer_demo],
210
+ ["Resume Analyzer", "Offer Letter Generator"],
211
+ title="HR Assistant",
212
+ )
213
+
214
+ if __name__ == '__main__':
215
+ demo.launch(share=True)