Spaces:

jacob-c
/

Resume_Screener_and_Skill_Extractor

Restarting on L4

App Files Files Community

root commited on 13 days ago

Commit

72d33a9

1 Parent(s): eaa3094

ss

Browse files

Files changed (2) hide show

app.py +384 -570
requirements.txt +18 -20

app.py CHANGED Viewed

@@ -1,89 +1,131 @@
 import streamlit as st
 import pdfplumber
-import io
-import spacy
-from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
-import subprocess
-import sys
-import torch
 import re
 import pandas as pd
-import numpy as np
 import plotly.express as px
 import plotly.graph_objects as go
-from datetime import datetime
-import dateparser
-from sentence_transformers import SentenceTransformer
-import nltk
-from nltk.tokenize import word_tokenize
-from nltk.corpus import stopwords
-from sklearn.metrics.pairwise import cosine_similarity
-import faiss
-import requests
-from bs4 import BeautifulSoup
-import networkx as nx
-import Levenshtein
-import json
-import matplotlib.pyplot as plt
-from io import BytesIO
-import base64
-from sentence_transformers import util
-# Download NLTK resources
-@st.cache_resource
-def download_nltk_resources():
-    nltk.download('punkt')
-    nltk.download('stopwords')
-    nltk.download('wordnet')
-    nltk.download('averaged_perceptron_tagger')
-download_nltk_resources()
 st.set_page_config(
     page_title="Resume Screener & Skill Extractor",
     page_icon="📄",
     layout="wide"
 )
-# Download spaCy model if not already downloaded
-@st.cache_resource
-def download_spacy_model():
     try:
-        nlp = spacy.load("en_core_web_sm")
-    except OSError:
-        subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
-        nlp = spacy.load("en_core_web_sm")
-    return nlp
-# Load the NLP models
 @st.cache_resource
 def load_models():
-    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-    nlp = download_spacy_model()
-    # Load sentence transformer model for semantic matching
-    sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
-    # Load Qwen3-8B model for career advice
-    try:
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        qwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-8B")
-        qwen_model = AutoModelForCausalLM.from_pretrained(
-            "Qwen/Qwen3-8B",
-            torch_dtype=torch.float16 if device == "cuda" else torch.float32,
-            device_map="auto"
-        )
-    except Exception as e:
-        st.error(f"Failed to load Qwen3-8B model: {str(e)}")
-        qwen_tokenizer = None
-        qwen_model = None
-    return summarizer, nlp, qwen_tokenizer, qwen_model, sentence_model
-# Initialize models
-summarizer, nlp, qwen_tokenizer, qwen_model, sentence_model = load_models()
-# Job descriptions and required skills
 job_descriptions = {
     "Software Engineer": {
         "skills": ["python", "java", "javascript", "sql", "algorithms", "data structures",
@@ -120,137 +162,272 @@ job_descriptions = {
             "Mid-level": "3-5 years of experience, model development, feature engineering",
             "Senior": "6+ years of experience, advanced ML techniques, research experience"
         }
-    },
-    "Product Manager": {
-        "skills": ["product strategy", "roadmap planning", "user stories", "agile", "market research",
-                   "stakeholder management", "analytics", "user experience", "a/b testing", "prioritization"],
-        "description": "Seeking product managers who can drive product vision, strategy, and execution.",
-        "must_have": ["product strategy", "roadmap planning", "stakeholder management"],
-        "nice_to_have": ["agile", "analytics", "a/b testing"],
-        "seniority_levels": {
-            "Junior": "0-2 years of experience, assisting with feature definition and user stories",
-            "Mid-level": "3-5 years of experience, owning products/features, market research",
-            "Senior": "6+ years of experience, defining product vision, managing teams, strategic planning"
-        }
-    },
-    "DevOps Engineer": {
-        "skills": ["linux", "aws", "docker", "kubernetes", "ci/cd", "terraform",
-                   "ansible", "monitoring", "scripting", "automation", "security"],
-        "description": "Looking for DevOps engineers to build and maintain infrastructure and deployment pipelines.",
-        "must_have": ["linux", "docker", "ci/cd"],
-        "nice_to_have": ["kubernetes", "terraform", "aws"],
-        "seniority_levels": {
-            "Junior": "0-2 years of experience, basic system administration, scripting",
-            "Mid-level": "3-5 years of experience, container orchestration, infrastructure as code",
-            "Senior": "6+ years of experience, architecture design, security, team leadership"
-        }
     }
 }
 def extract_text_from_pdf(pdf_file):
     text = ""
-    with pdfplumber.open(pdf_file) as pdf:
-        for page in pdf.pages:
-            text += page.extract_text() or ""
     return text
-def analyze_resume(text, job_title):
-    # Extract relevant skills
-    doc = nlp(text.lower())
     found_skills = []
     required_skills = job_descriptions[job_title]["skills"]
     for skill in required_skills:
-        if skill in text.lower():
             found_skills.append(skill)
     # Generate summary
-    chunks = [text[i:i + 1000] for i in range(0, len(text), 1000)]
-    summaries = []
-    for chunk in chunks[:3]:  # Process first 3000 characters to avoid token limits
-        summary = summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]["summary_text"]
-        summaries.append(summary)
-    # Extract experience timeline
     experiences = extract_experience(text)
     # Calculate semantic match score
-    match_score = semantic_matching(text, job_title)
-    # Estimate seniority
-    seniority, years_experience, leadership_count, must_have_percentage = estimate_seniority(experiences, found_skills, job_title)
-    # Extract skill levels
-    skill_levels = extract_skill_levels(text, found_skills)
-    # Check for timeline inconsistencies
-    inconsistencies = check_timeline_inconsistencies(experiences)
-    # Verify companies
-    company_verification = verify_companies(experiences)
-    # Predict career trajectory
-    career_prediction = predict_career_trajectory(experiences, seniority, job_title)
     return {
         'found_skills': found_skills,
-        'summary': " ".join(summaries),
         'experiences': experiences,
         'match_score': match_score,
         'seniority': seniority,
-        'years_experience': years_experience,
-        'skill_levels': skill_levels,
         'inconsistencies': inconsistencies,
-        'company_verification': company_verification,
         'career_prediction': career_prediction
     }
 def generate_career_advice(resume_text, job_title, found_skills, missing_skills):
-    if qwen_model is None or qwen_tokenizer is None:
-        return "Career advice model not available. Please check the model installation."
-    # Create a prompt for the model
-    prompt = f"""
-You are a professional career advisor. Based on the resume and the target job position,
-provide personalized advice on skills to develop and suggest projects that would help the candidate
-become a better fit for the position.
-Resume summary: {resume_text[:1000]}...
-Target position: {job_title}
-Job requirements: {job_descriptions[job_title]['description']}
-Skills the candidate has: {', '.join(found_skills)}
-Skills the candidate needs to develop: {', '.join(missing_skills)}
-Provide the following:
-1. Specific advice on how to develop the missing skills
-2. 3-5 project ideas that would showcase these skills
-3. Resources for learning (courses, books, websites)
 """
-    # Generate advice using Qwen3-8B
-    try:
-        inputs = qwen_tokenizer(prompt, return_tensors="pt").to(qwen_model.device)
-        with torch.no_grad():
-            outputs = qwen_model.generate(
-                **inputs,
-                max_new_tokens=1024,
-                temperature=0.7,
-                top_p=0.9,
-                do_sample=True
-            )
-        advice = qwen_tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
-        return advice
-    except Exception as e:
-        return f"Failed to generate career advice: {str(e)}"
-# Streamlit UI
-st.title("📄 Resume Screener & Skill Extractor")
-# Add description
 st.markdown("""
 This app helps recruiters analyze resumes by:
 - Extracting relevant skills for specific job positions
@@ -283,38 +460,36 @@ if uploaded_file and job_title:
             text = extract_text_from_pdf(uploaded_file)
             # Analyze resume
-            resume_data = analyze_resume(text, job_title)
             # Calculate missing skills
             missing_skills = [skill for skill in job_descriptions[job_title]["skills"]
-                            if skill not in resume_data['found_skills']]
         # Display results in tabs
-        tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
             "📊 Skills Match",
             "📝 Resume Summary",
             "🎯 Skills Gap",
-            "👨‍💼 Career Path",
-            "🔍 Authentication",
             "🚀 Career Advice"
         ])
         with tab1:
-            # First create columns for skill match percentage and semantic match
             col1, col2 = st.columns(2)
             with col1:
                 # Display matched skills
                 st.subheader("🎯 Matched Skills")
-                if resume_data['found_skills']:
-                    for skill in resume_data['found_skills']:
                         # Show skill with proficiency level
-                        level = resume_data['skill_levels'].get(skill, 'intermediate')
                         level_emoji = "🟢" if level == 'advanced' else "🟡" if level == 'intermediate' else "🟠"
                         st.success(f"{level_emoji} {skill.title()} ({level.title()})")
                     # Calculate match percentage
-                    match_percentage = len(resume_data['found_skills']) / len(job_descriptions[job_title]["skills"]) * 100
                     st.metric("Skills Match", f"{match_percentage:.1f}%")
                 else:
                     st.warning("No direct skill matches found.")
@@ -322,11 +497,11 @@ if uploaded_file and job_title:
             with col2:
                 # Display semantic match score
                 st.subheader("💡 Semantic Match")
-                st.metric("Overall Match Score", f"{resume_data['match_score']:.1f}%")
                 # Display must-have skills match
                 must_have_skills = job_descriptions[job_title]["must_have"]
-                must_have_count = sum(1 for skill in must_have_skills if skill in resume_data['found_skills'])
                 must_have_percentage = (must_have_count / len(must_have_skills)) * 100
                 st.write("Must-have skills:")
@@ -335,20 +510,20 @@ if uploaded_file and job_title:
                 # Professional level assessment
                 st.subheader("🧠 Seniority Assessment")
-                st.info(f"**{resume_data['seniority']}** ({resume_data['years_experience']:.1f} years equivalent experience)")
-                st.write(job_descriptions[job_title]["seniority_levels"][resume_data['seniority']])
         with tab2:
             # Display resume summary
             st.subheader("📝 Resume Summary")
-            st.write(resume_data['summary'])
             # Display experience timeline
             st.subheader("⏳ Experience Timeline")
-            if resume_data['experiences']:
                 # Convert experiences to dataframe for display
                 exp_data = []
-                for exp in resume_data['experiences']:
                     if 'start_date' in exp and 'end_date' in exp:
                         exp_data.append({
                             'Company': exp['company'],
@@ -369,33 +544,36 @@ if uploaded_file and job_title:
                     st.dataframe(exp_df)
                     # Create a timeline visualization if dates are available
-                    timeline_data = [exp for exp in resume_data['experiences'] if 'start_date' in exp and 'end_date' in exp]
-                    if timeline_data:
-                        # Sort by start date
-                        timeline_data = sorted(timeline_data, key=lambda x: x['start_date'])
-                        # Create figure
-                        fig = go.Figure()
-                        for i, exp in enumerate(timeline_data):
-                            fig.add_trace(go.Bar(
-                                x=[(exp['end_date'] - exp['start_date']).days / 30],  # Duration in months
-                                y=[exp['company']],
-                                orientation='h',
-                                name=exp['role'],
-                                hovertext=f"{exp['role']} at {exp['company']}<br>{exp['start_date'].strftime('%b %Y')} - {exp['end_date'].strftime('%b %Y') if exp['end_date'] != datetime.now() else 'Present'}<br>Duration: {exp.get('duration_months', 0)} months",
-                                marker=dict(color=px.colors.qualitative.Plotly[i % len(px.colors.qualitative.Plotly)])
-                            ))
-                        fig.update_layout(
-                            title="Career Timeline",
-                            xaxis_title="Duration (months)",
-                            yaxis_title="Company",
-                            height=400,
-                            margin=dict(l=0, r=0, b=0, t=30)
-                        )
-                        st.plotly_chart(fig, use_container_width=True)
             else:
                 st.warning("No work experience data could be extracted.")
@@ -420,7 +598,7 @@ if uploaded_file and job_title:
                 # Show must-have skills that are missing
                 missing_must_have = [skill for skill in job_descriptions[job_title]["must_have"]
-                                   if skill not in resume_data['found_skills']]
                 if missing_must_have:
                     st.error("**Critical Skills Missing:**")
@@ -433,7 +611,7 @@ if uploaded_file and job_title:
                 # Show nice-to-have skills gap
                 missing_nice_to_have = [skill for skill in job_descriptions[job_title]["nice_to_have"]
-                                      if skill not in resume_data['found_skills']]
                 if missing_nice_to_have:
                     st.warning("**Nice-to-Have Skills Missing:**")
@@ -441,388 +619,24 @@ if uploaded_file and job_title:
                         st.write(f"- {skill.title()}")
                 else:
                     st.success("Candidate has all the nice-to-have skills!")
-        with tab4:
-            # Display career path insights
-            st.subheader("👨‍💼 Career Trajectory")
-            # Show career prediction
-            st.info(resume_data['career_prediction'])
-            # Show experience trends
-            st.subheader("📈 Experience Analysis")
-            # Check for job hopping
-            if len(resume_data['experiences']) >= 3:
-                # Calculate average job duration
-                durations = [exp.get('duration_months', 0) for exp in resume_data['experiences']
-                            if 'duration_months' in exp]
-                if durations:
-                    avg_duration = sum(durations) / len(durations)
-                    if avg_duration < 12:
-                        st.warning(f"🚩 **Frequent Job Changes**: Average job duration is only {avg_duration:.1f} months")
-                    elif avg_duration < 24:
-                        st.warning(f"⚠️ **Moderate Job Hopping**: Average job duration is {avg_duration:.1f} months")
-                    else:
-                        st.success(f"✅ **Stable Employment**: Average job duration is {avg_duration:.1f} months")
-            # Show inconsistencies if any
-            if resume_data['inconsistencies']:
-                st.subheader("⚠️ Timeline Inconsistencies")
-                for issue in resume_data['inconsistencies']:
-                    if issue['type'] == 'overlap':
-                        st.warning(issue['description'])
-                    elif issue['type'] == 'gap':
-                        st.info(issue['description'])
-        with tab5:
-            # Display authentication signals
-            st.subheader("🔍 Resume Authentication")
-            # Company verification results
-            st.write("**Company Verification Results:**")
-            if resume_data['company_verification']:
-                # Count suspicious companies
-                suspicious_count = sum(1 for v in resume_data['company_verification']
-                                    if v['status'] == 'suspicious')
-                if suspicious_count == 0:
-                    st.success("✅ All companies mentioned in the resume passed basic verification")
-                else:
-                    st.warning(f"⚠️ {suspicious_count} companies require further verification")
-                # Display verification details
-                verification_data = [{
-                    'Company': v['company'],
-                    'Status': v['status'].title(),
-                    'Notes': v['reason']
-                } for v in resume_data['company_verification']]
-                st.dataframe(pd.DataFrame(verification_data))
-            else:
-                st.info("No company information found for verification.")
-            # Timeline consistency check
-            st.write("**Timeline Consistency Check:**")
-            if not resume_data['inconsistencies']:
-                st.success("✅ No timeline inconsistencies detected")
-            else:
-                st.warning(f"⚠️ {len(resume_data['inconsistencies'])} timeline inconsistencies found")
-                for issue in resume_data['inconsistencies']:
-                    st.write(f"- {issue['description']}")
-        with tab6:
             # Display career advice
             st.subheader("🚀 Career Advice and Project Recommendations")
             if st.button("Generate Career Advice"):
                 with st.spinner("Generating personalized career advice..."):
-                    advice = generate_career_advice(text, job_title, resume_data['found_skills'], missing_skills)
                     st.markdown(advice)
     except Exception as e:
         st.error(f"An error occurred while processing the resume: {str(e)}")
 # Add footer
 st.markdown("---")
-st.markdown("Made with ❤️ using Streamlit and Hugging Face")
-# Semantic matching between resume and job description
-def semantic_matching(resume_text, job_title):
-    job_desc = job_descriptions[job_title]["description"]
-    # Encode texts using sentence transformers
-    resume_embedding = sentence_model.encode(resume_text, convert_to_tensor=True)
-    job_embedding = sentence_model.encode(job_desc, convert_to_tensor=True)
-    # Calculate cosine similarity
-    cos_sim = cosine_similarity(
-        resume_embedding.cpu().numpy().reshape(1, -1),
-        job_embedding.cpu().numpy().reshape(1, -1)
-    )[0][0]
-    return cos_sim * 100  # Convert to percentage
-# Extract experience timeline from resume
-def extract_experience(text):
-    # Pattern to find work experience entries
-    # Look for patterns like "Company Name | Role | Jan 2020 - Present"
-    exp_pattern = r"(?i)(.*?(?:inc|llc|ltd|company|corp|corporation|group)?)\s*(?:[|•-]\s*)?(.*?)(?:[|•-]\s*)((?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[\w\s,]*\d{4}\s*(?:-|to|–)\s*(?:(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[\w\s,]*\d{4}|present))"
-    experiences = []
-    for match in re.finditer(exp_pattern, text, re.IGNORECASE):
-        company = match.group(1).strip()
-        role = match.group(2).strip()
-        duration = match.group(3).strip()
-        # Parse dates
-        try:
-            date_range = duration.split('-') if '-' in duration else duration.split('to') if 'to' in duration else duration.split('–')
-            start_date = dateparser.parse(date_range[0].strip())
-            if 'present' in date_range[1].lower():
-                end_date = datetime.now()
-            else:
-                end_date = dateparser.parse(date_range[1].strip())
-            if start_date and end_date:
-                # Calculate duration in months
-                months = (end_date.year - start_date.year) * 12 + (end_date.month - start_date.month)
-                experiences.append({
-                    'company': company,
-                    'role': role,
-                    'start_date': start_date,
-                    'end_date': end_date,
-                    'duration_months': months
-                })
-        except:
-            # If date parsing fails, still include the experience without dates
-            experiences.append({
-                'company': company,
-                'role': role,
-                'duration': duration
-            })
-    return experiences
-# Estimate seniority based on experience and skills
-def estimate_seniority(experiences, found_skills, job_title):
-    # Calculate total experience in years
-    total_months = sum(exp.get('duration_months', 0) for exp in experiences if 'duration_months' in exp)
-    total_years = total_months / 12
-    # Count leadership keywords in roles
-    leadership_keywords = ['lead', 'senior', 'manager', 'head', 'principal', 'architect', 'director']
-    leadership_count = 0
-    for exp in experiences:
-        role = exp.get('role', '').lower()
-        for keyword in leadership_keywords:
-            if keyword in role:
-                leadership_count += 1
-                break
-    # Calculate skill match percentage for must-have skills
-    must_have_skills = job_descriptions[job_title]["must_have"]
-    must_have_count = sum(1 for skill in must_have_skills if skill in [s.lower() for s in found_skills])
-    must_have_percentage = (must_have_count / len(must_have_skills)) * 100 if must_have_skills else 0
-    # Determine seniority level
-    if total_years < 3:
-        seniority = "Junior"
-    elif total_years < 6:
-        seniority = "Mid-level"
-    else:
-        seniority = "Senior"
-    # Adjust based on leadership roles and skill match
-    if leadership_count >= 2 and seniority != "Senior":
-        seniority = "Senior" if total_years >= 4 else seniority
-    if must_have_percentage < 50 and seniority == "Senior":
-        seniority = "Mid-level"
-    return seniority, total_years, leadership_count, must_have_percentage
-# Check for timeline inconsistencies
-def check_timeline_inconsistencies(experiences):
-    if not experiences:
-        return []
-    inconsistencies = []
-    sorted_experiences = sorted(
-        [exp for exp in experiences if 'start_date' in exp and 'end_date' in exp],
-        key=lambda x: x['start_date']
-    )
-    for i in range(len(sorted_experiences) - 1):
-        current = sorted_experiences[i]
-        next_exp = sorted_experiences[i + 1]
-        # Check for overlapping full-time roles
-        if current['end_date'] > next_exp['start_date']:
-            overlap_months = (current['end_date'].year - next_exp['start_date'].year) * 12 + \
-                            (current['end_date'].month - next_exp['start_date'].month)
-            if overlap_months > 1:  # Allow 1 month overlap for transitions
-                inconsistencies.append({
-                    'type': 'overlap',
-                    'description': f"Overlapping roles: {current['company']} and {next_exp['company']} " +
-                                   f"overlap by {overlap_months} months"
-                })
-    # Check for gaps in employment
-    for i in range(len(sorted_experiences) - 1):
-        current = sorted_experiences[i]
-        next_exp = sorted_experiences[i + 1]
-        gap_months = (next_exp['start_date'].year - current['end_date'].year) * 12 + \
-                     (next_exp['start_date'].month - current['end_date'].month)
-        if gap_months > 3:  # Flag gaps longer than 3 months
-            inconsistencies.append({
-                'type': 'gap',
-                'description': f"Employment gap of {gap_months} months between " +
-                               f"{current['company']} and {next_exp['company']}"
-            })
-    return inconsistencies
-# Verify company existence (simplified version)
-def verify_companies(experiences):
-    verification_results = []
-    for exp in experiences:
-        company = exp.get('company', '')
-        if not company:
-            continue
-        # Simple heuristic - companies less than 3 characters are suspicious
-        if len(company) < 3:
-            verification_results.append({
-                'company': company,
-                'status': 'suspicious',
-                'reason': 'Company name too short'
-            })
-            continue
-        # Check if company matches common fake patterns
-        fake_patterns = ['abc company', 'xyz corp', 'my company', 'personal project']
-        if any(pattern in company.lower() for pattern in fake_patterns):
-            verification_results.append({
-                'company': company,
-                'status': 'suspicious',
-                'reason': 'Matches pattern of fake company names'
-            })
-            continue
-        # In a real implementation, you'd call an API to check if the company exists
-        # For this demo, we'll just mark all others as verified
-        verification_results.append({
-            'company': company,
-            'status': 'verified',
-            'reason': 'Passed basic verification checks'
-        })
-    return verification_results
-# Extract skill levels from text
-def extract_skill_levels(text, skills):
-    skill_levels = {}
-    proficiency_indicators = {
-        'basic': ['basic', 'familiar', 'beginner', 'fundamentals', 'exposure'],
-        'intermediate': ['intermediate', 'proficient', 'experienced', 'competent', 'skilled'],
-        'advanced': ['advanced', 'expert', 'mastery', 'specialist', 'lead', 'senior']
-    }
-    for skill in skills:
-        # Look for sentences containing the skill
-        sentences = re.findall(r'[^.!?]*%s[^.!?]*[.!?]' % re.escape(skill), text.lower())
-        # Default level
-        level = 'intermediate'
-        # Check for years of experience indicators
-        years_pattern = re.compile(r'(\d+)\s*(?:\+)?\s*years?(?:\s+of)?\s+(?:experience|exp)?\s+(?:with|in|using)?\s+%s' % re.escape(skill), re.IGNORECASE)
-        for sentence in sentences:
-            years_match = years_pattern.search(sentence)
-            if years_match:
-                years = int(years_match.group(1))
-                if years < 2:
-                    level = 'basic'
-                elif years < 5:
-                    level = 'intermediate'
-                else:
-                    level = 'advanced'
-                break
-        # Check for proficiency indicators
-        if level == 'intermediate':  # Only override if not already set by years
-            for level_name, indicators in proficiency_indicators.items():
-                for indicator in indicators:
-                    pattern = re.compile(r'%s\s+(?:\w+\s+){0,3}%s' % (indicator, re.escape(skill)), re.IGNORECASE)
-                    if any(pattern.search(sentence) for sentence in sentences):
-                        level = level_name
-                        break
-                if level != 'intermediate':
-                    break
-        skill_levels[skill] = level
-    return skill_levels
-# Generate career trajectory prediction
-def predict_career_trajectory(experiences, seniority, job_title):
-    if not experiences:
-        return "Unable to predict trajectory due to insufficient experience data."
-    # Extract roles in chronological order
-    roles = [exp.get('role', '').lower() for exp in experiences if 'role' in exp]
-    # If less than 2 roles, not enough data for prediction
-    if len(roles) < 2:
-        if seniority == "Junior":
-            next_role = "Mid-level " + job_title
-        elif seniority == "Mid-level":
-            next_role = "Senior " + job_title
-        else:  # Senior
-            leadership_titles = {
-                "Software Engineer": "Technical Lead or Engineering Manager",
-                "Data Scientist": "Lead Data Scientist or Data Science Manager",
-                "Interaction Designer": "Design Lead or UX Director",
-                "Product Manager": "Senior Product Manager or Director of Product",
-                "DevOps Engineer": "DevOps Lead or Infrastructure Architect"
-            }
-            next_role = leadership_titles.get(job_title, f"Director of {job_title}")
-        return f"Based on current seniority level, the next logical role could be: {next_role}"
-    # Check for upward mobility patterns
-    progression_indicators = ['junior', 'senior', 'lead', 'manager', 'director', 'vp', 'head', 'chief']
-    current_level = -1
-    for role in roles:
-        for i, indicator in enumerate(progression_indicators):
-            if indicator in role:
-                if i > current_level:
-                    current_level = i
-    # Predict next role based on current level
-    if current_level < len(progression_indicators) - 1:
-        next_level = progression_indicators[current_level + 1]
-        # Map to specific job titles
-        if next_level == 'senior' and 'senior' not in roles[-1].lower():
-            next_role = f"Senior {job_title}"
-        elif next_level == 'lead':
-            next_role = f"{job_title} Lead"
-        elif next_level == 'manager':
-            if job_title == "Software Engineer":
-                next_role = "Engineering Manager"
-            else:
-                next_role = f"{job_title} Manager"
-        elif next_level == 'director':
-            next_role = f"Director of {job_title}s"
-        elif next_level == 'vp':
-            next_role = f"VP of {job_title}s"
-        elif next_level == 'head':
-            next_role = f"Head of {job_title}"
-        elif next_level == 'chief':
-            if job_title == "Software Engineer":
-                next_role = "CTO (Chief Technology Officer)"
-            elif job_title == "Data Scientist":
-                next_role = "Chief Data Officer"
-            elif job_title == "Product Manager":
-                next_role = "Chief Product Officer"
-            else:
-                next_role = f"Chief {job_title} Officer"
-        else:
-            next_role = f"{next_level.title()} {job_title}"
-    else:
-        next_role = "Executive Leadership or Strategic Advisory roles"
-    return f"Based on career progression, the next logical role could be: {next_role}"

 import streamlit as st
 import pdfplumber
 import re
 import pandas as pd
+import matplotlib.pyplot as plt
+import torch
+from datetime import datetime
 import plotly.express as px
 import plotly.graph_objects as go
+import numpy as np
+# Display startup message
 st.set_page_config(
     page_title="Resume Screener & Skill Extractor",
     page_icon="📄",
     layout="wide"
 )
+st.title("📄 Resume Screener & Skill Extractor")
+startup_message = st.empty()
+startup_message.info("Loading dependencies and models... This may take a minute on first run.")
+# Import dependencies with fallbacks
+try:
+    import spacy
+    spacy_available = True
+except ImportError:
+    spacy_available = False
+    st.warning("spaCy is not available. Some features will be limited.")
+try:
+    from transformers import pipeline
+    transformers_available = True
+except ImportError:
+    transformers_available = False
+    st.warning("Transformers is not available. Summary generation will be limited.")
+try:
+    import nltk
+    from nltk.tokenize import word_tokenize
+    nltk_available = True
+    # Download required NLTK resources
     try:
+        nltk.data.find('tokenizers/punkt')
+    except LookupError:
+        nltk.download('punkt')
+except ImportError:
+    nltk_available = False
+    st.warning("NLTK is not available. Some text processing features will be limited.")
+# Custom sentence-transformers fallback
+try:
+    from sentence_transformers import SentenceTransformer
+    try:
+        from sentence_transformers import util as st_util
+        sentence_transformers_available = True
+    except ImportError:
+        # Define our own utility functions
+        class CustomSTUtil:
+            @staticmethod
+            def pytorch_cos_sim(a, b):
+                if not isinstance(a, torch.Tensor):
+                    a = torch.tensor(a)
+                if not isinstance(b, torch.Tensor):
+                    b = torch.tensor(b)
+                if len(a.shape) == 1:
+                    a = a.unsqueeze(0)
+                if len(b.shape) == 1:
+                    b = b.unsqueeze(0)
+                a_norm = torch.nn.functional.normalize(a, p=2, dim=1)
+                b_norm = torch.nn.functional.normalize(b, p=2, dim=1)
+                return torch.mm(a_norm, b_norm.transpose(0, 1))
+        st_util = CustomSTUtil()
+        sentence_transformers_available = True
+except ImportError:
+    sentence_transformers_available = False
+    st.warning("Sentence Transformers is not available. Semantic matching will be disabled.")
+# Load models with exception handling
 @st.cache_resource
 def load_models():
+    models = {}
+    # Load spaCy if available
+    if spacy_available:
+        try:
+            models['nlp'] = spacy.load("en_core_web_sm")
+        except OSError:
+            try:
+                import subprocess
+                import sys
+                subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
+                models['nlp'] = spacy.load("en_core_web_sm")
+            except Exception as e:
+                st.warning(f"Could not load spaCy model: {e}")
+                models['nlp'] = None
+    else:
+        models['nlp'] = None
+    # Load summarizer if transformers available
+    if transformers_available:
+        try:
+            models['summarizer'] = pipeline("summarization", model="facebook/bart-large-cnn")
+        except Exception as e:
+            st.warning(f"Could not load summarizer model: {e}")
+            # Simple fallback summarizer
+            models['summarizer'] = lambda text, **kwargs: [{"summary_text": ". ".join(text.split(". ")[:5]) + "."}]
+    else:
+        # Simple fallback summarizer
+        models['summarizer'] = lambda text, **kwargs: [{"summary_text": ". ".join(text.split(". ")[:5]) + "."}]
+    # Load sentence transformer if available
+    if sentence_transformers_available:
+        try:
+            models['sentence_model'] = SentenceTransformer('paraphrase-MiniLM-L6-v2')
+        except Exception as e:
+            st.warning(f"Could not load sentence transformer model: {e}")
+            models['sentence_model'] = None
+    else:
+        models['sentence_model'] = None
+    return models
+# Job descriptions dictionary
 job_descriptions = {
     "Software Engineer": {
         "skills": ["python", "java", "javascript", "sql", "algorithms", "data structures",
             "Mid-level": "3-5 years of experience, model development, feature engineering",
             "Senior": "6+ years of experience, advanced ML techniques, research experience"
         }
     }
 }
+# Core functionality
 def extract_text_from_pdf(pdf_file):
+    """Extract text from PDF file."""
     text = ""
+    try:
+        with pdfplumber.open(pdf_file) as pdf:
+            for page in pdf.pages:
+                text += page.extract_text() or ""
+    except Exception as e:
+        st.error(f"Error extracting text from PDF: {e}")
     return text
+def extract_skills(text, job_title, nlp=None):
+    """Extract skills from resume text."""
     found_skills = []
     required_skills = job_descriptions[job_title]["skills"]
+    # Simple keyword matching (no NLP needed)
     for skill in required_skills:
+        if skill.lower() in text.lower():
             found_skills.append(skill)
+    return found_skills
+def extract_experience(text):
+    """Extract work experience from resume text."""
+    experiences = []
+    # Define regex pattern for experiences
+    experience_pattern = r"(?i)(\w+[\w\s&,.']+)\s*(?:[-|•]|\bat\b)\s*([A-Za-z][\w\s&,.']+)\s*(?:[-|•]|\bfrom\b)\s*(\d{4}(?:\s*[-–]\s*(?:\d{4}|present|current)))"
+    matches = re.finditer(experience_pattern, text)
+    for match in matches:
+        company = match.group(1).strip()
+        role = match.group(2).strip()
+        duration = match.group(3).strip()
+        # Process dates
+        try:
+            date_parts = re.split(r'[-–]', duration)
+            start_year = int(date_parts[0].strip())
+            if len(date_parts) > 1 and 'present' not in date_parts[1].lower() and 'current' not in date_parts[1].lower():
+                end_year = int(date_parts[1].strip())
+                end_date = datetime(end_year, 12, 31)
+            else:
+                end_year = datetime.now().year
+                end_date = datetime.now()
+            start_date = datetime(start_year, 1, 1)
+            duration_months = (end_date.year - start_date.year) * 12 + (end_date.month - start_date.month)
+            experiences.append({
+                'company': company,
+                'role': role,
+                'start_date': start_date,
+                'end_date': end_date,
+                'duration_months': duration_months
+            })
+        except:
+            experiences.append({
+                'company': company,
+                'role': role,
+                'duration': duration
+            })
+    return experiences
+def analyze_resume(text, job_title, models):
+    """Analyze resume text."""
+    # Extract skills
+    found_skills = extract_skills(text, job_title, models.get('nlp'))
     # Generate summary
+    if models.get('summarizer'):
+        try:
+            summary = models['summarizer'](text[:3000], max_length=150, min_length=50, do_sample=False)[0]["summary_text"]
+        except Exception as e:
+            st.warning(f"Error generating summary: {e}")
+            summary = text[:500] + "..."
+    else:
+        summary = text[:500] + "..."
+    # Extract work experience
     experiences = extract_experience(text)
     # Calculate semantic match score
+    match_score = 0
+    if models.get('sentence_model') and sentence_transformers_available:
+        try:
+            resume_embedding = models['sentence_model'].encode(text[:5000], convert_to_tensor=True)
+            job_embedding = models['sentence_model'].encode(job_descriptions[job_title]["description"], convert_to_tensor=True)
+            match_score = float(st_util.pytorch_cos_sim(resume_embedding, job_embedding)[0][0]) * 100
+        except Exception as e:
+            st.warning(f"Error calculating semantic match: {e}")
+    else:
+        # Fallback to keyword-based score
+        match_score = (len(found_skills) / len(job_descriptions[job_title]["skills"])) * 100
+    # Calculate seniority level
+    years_exp = sum(exp.get('duration_months', 0) for exp in experiences if 'duration_months' in exp) / 12
+    if years_exp < 3:
+        seniority = "Junior"
+    elif years_exp < 6:
+        seniority = "Mid-level"
+    else:
+        seniority = "Senior"
+    # Detect skill levels
+    skill_levels = {}
+    for skill in found_skills:
+        # Default level
+        skill_levels[skill] = "intermediate"
+        # Look for advanced indicators
+        advanced_patterns = [
+            f"expert in {skill}",
+            f"advanced {skill}",
+            f"extensive experience with {skill}"
+        ]
+        if any(pattern in text.lower() for pattern in advanced_patterns):
+            skill_levels[skill] = "advanced"
+        # Look for basic indicators
+        basic_patterns = [
+            f"familiar with {skill}",
+            f"basic knowledge of {skill}",
+            f"introduced to {skill}"
+        ]
+        if any(pattern in text.lower() for pattern in basic_patterns):
+            skill_levels[skill] = "basic"
+    # Check for inconsistencies in timeline
+    inconsistencies = []
+    if len(experiences) >= 2:
+        # Sort experiences by start date
+        sorted_exps = sorted(
+            [exp for exp in experiences if 'start_date' in exp],
+            key=lambda x: x['start_date']
+        )
+        # Check for overlaps
+        for i in range(len(sorted_exps) - 1):
+            current = sorted_exps[i]
+            next_exp = sorted_exps[i+1]
+            if current['end_date'] > next_exp['start_date']:
+                inconsistencies.append({
+                    'type': 'overlap',
+                    'description': f"Overlapping roles at {current['company']} and {next_exp['company']}"
+                })
+    # Generate a simple career prediction
+    career_prediction = predict_career_path(seniority, job_title)
     return {
         'found_skills': found_skills,
+        'skill_levels': skill_levels,
+        'summary': summary,
         'experiences': experiences,
         'match_score': match_score,
         'seniority': seniority,
+        'years_experience': years_exp,
         'inconsistencies': inconsistencies,
         'career_prediction': career_prediction
     }
+def predict_career_path(seniority, job_title):
+    """Generate a simple career prediction."""
+    if seniority == "Junior":
+        return f"Next potential role: Senior {job_title}"
+    elif seniority == "Mid-level":
+        roles = {
+            "Software Engineer": "Team Lead, Technical Lead, or Engineering Manager",
+            "Data Scientist": "Senior Data Scientist or Data Science Lead",
+            "Interaction Designer": "Senior Designer or UX Lead"
+        }
+        return f"Next potential roles: {roles.get(job_title, f'Senior {job_title}')}"
+    else: # Senior
+        roles = {
+            "Software Engineer": "Engineering Manager, Software Architect, or CTO",
+            "Data Scientist": "Head of Data Science, ML Engineering Manager, or Chief Data Officer",
+            "Interaction Designer": "Design Director, Head of UX, or VP of Design"
+        }
+        return f"Next potential roles: {roles.get(job_title, f'Director of {job_title}')}"
 def generate_career_advice(resume_text, job_title, found_skills, missing_skills):
+    """Generate career advice based on resume analysis."""
+    advice = f"""## Career Development Plan for {job_title}
+### Skills to Develop
+The following skills would strengthen your profile for this position:
+"""
+    for skill in missing_skills:
+        advice += f"- **{skill.title()}**: "
+        if skill == "python":
+            advice += "Take online courses like Coursera's Python for Everybody or follow tutorials on Real Python."
+        elif skill == "java":
+            advice += "Complete the Oracle Java Certification or contribute to open-source Java projects."
+        elif skill == "javascript":
+            advice += "Build interactive web applications using modern frameworks like React or Vue."
+        elif skill == "cloud":
+            advice += "Get hands-on experience with AWS, Azure, or GCP through their free tier offerings."
+        elif "algorithm" in skill or "data structure" in skill:
+            advice += "Practice on platforms like LeetCode or HackerRank and study algorithm design principles."
+        elif "ui" in skill or "ux" in skill:
+            advice += "Create a portfolio of design work and study interaction design principles."
+        elif "machine learning" in skill:
+            advice += "Take Andrew Ng's Machine Learning course on Coursera and work on ML projects with real datasets."
+        else:
+            advice += f"Research and practice this skill through online courses, tutorials, and hands-on projects."
+        advice += "\n\n"
+    advice += f"""
+### Project Ideas
+Consider these projects to showcase your skills for a {job_title} position:
+"""
+    if job_title == "Software Engineer":
+        advice += """
+1. **Full-Stack Web Application**: Build a complete web app with frontend, backend, and database
+2. **API Service**: Create a RESTful or GraphQL API with proper authentication and documentation
+3. **Open Source Contribution**: Contribute to relevant open-source projects in your area of interest
+"""
+    elif job_title == "Data Scientist":
+        advice += """
+1. **Predictive Model**: Build and deploy a machine learning model that solves a real-world problem
+2. **Data Dashboard**: Create an interactive visualization dashboard for complex datasets
+3. **Natural Language Processing**: Develop a text classification or sentiment analysis project
+"""
+    elif job_title == "Interaction Designer":
+        advice += """
+1. **Design System**: Create a comprehensive design system with components and usage guidelines
+2. **UX Case Study**: Document your design process for a real or fictional product improvement
+3. **Interactive Prototype**: Design a fully functional prototype that demonstrates your interaction design skills
+"""
+    advice += """
+### Learning Resources
+- **Online Platforms**: Coursera, Udemy, Pluralsight, LinkedIn Learning
+- **Practice Sites**: GitHub, HackerRank, LeetCode, Kaggle
+- **Communities**: Stack Overflow, Reddit programming communities, relevant Discord servers
 """
+    return advice
+# Load models
+models = load_models()
+# Clear startup message
+startup_message.empty()
+# App description
 st.markdown("""
 This app helps recruiters analyze resumes by:
 - Extracting relevant skills for specific job positions
             text = extract_text_from_pdf(uploaded_file)
             # Analyze resume
+            analysis_results = analyze_resume(text, job_title, models)
             # Calculate missing skills
             missing_skills = [skill for skill in job_descriptions[job_title]["skills"]
+                             if skill not in analysis_results['found_skills']]
         # Display results in tabs
+        tab1, tab2, tab3, tab4 = st.tabs([
             "📊 Skills Match",
             "📝 Resume Summary",
             "🎯 Skills Gap",
             "🚀 Career Advice"
         ])
         with tab1:
+            # Create two columns
             col1, col2 = st.columns(2)
             with col1:
                 # Display matched skills
                 st.subheader("🎯 Matched Skills")
+                if analysis_results['found_skills']:
+                    for skill in analysis_results['found_skills']:
                         # Show skill with proficiency level
+                        level = analysis_results['skill_levels'].get(skill, 'intermediate')
                         level_emoji = "🟢" if level == 'advanced' else "🟡" if level == 'intermediate' else "🟠"
                         st.success(f"{level_emoji} {skill.title()} ({level.title()})")
                     # Calculate match percentage
+                    match_percentage = len(analysis_results['found_skills']) / len(job_descriptions[job_title]["skills"]) * 100
                     st.metric("Skills Match", f"{match_percentage:.1f}%")
                 else:
                     st.warning("No direct skill matches found.")
             with col2:
                 # Display semantic match score
                 st.subheader("💡 Semantic Match")
+                st.metric("Overall Match Score", f"{analysis_results['match_score']:.1f}%")
                 # Display must-have skills match
                 must_have_skills = job_descriptions[job_title]["must_have"]
+                must_have_count = sum(1 for skill in must_have_skills if skill in analysis_results['found_skills'])
                 must_have_percentage = (must_have_count / len(must_have_skills)) * 100
                 st.write("Must-have skills:")
                 # Professional level assessment
                 st.subheader("🧠 Seniority Assessment")
+                st.info(f"**{analysis_results['seniority']}** ({analysis_results['years_experience']:.1f} years equivalent experience)")
+                st.write(job_descriptions[job_title]["seniority_levels"][analysis_results['seniority']])
         with tab2:
             # Display resume summary
             st.subheader("📝 Resume Summary")
+            st.write(analysis_results['summary'])
             # Display experience timeline
             st.subheader("⏳ Experience Timeline")
+            if analysis_results['experiences']:
                 # Convert experiences to dataframe for display
                 exp_data = []
+                for exp in analysis_results['experiences']:
                     if 'start_date' in exp and 'end_date' in exp:
                         exp_data.append({
                             'Company': exp['company'],
                     st.dataframe(exp_df)
                     # Create a timeline visualization if dates are available
+                    timeline_data = [exp for exp in analysis_results['experiences'] if 'start_date' in exp and 'end_date' in exp]
+                    if timeline_data and len(timeline_data) > 0:
+                        try:
+                            # Sort by start date
+                            timeline_data = sorted(timeline_data, key=lambda x: x['start_date'])
+                            # Create figure
+                            fig = go.Figure()
+                            for i, exp in enumerate(timeline_data):
+                                fig.add_trace(go.Bar(
+                                    x=[(exp['end_date'] - exp['start_date']).days / 30],  # Duration in months
+                                    y=[exp['company']],
+                                    orientation='h',
+                                    name=exp['role'],
+                                    hovertext=f"{exp['role']} at {exp['company']}",
+                                    marker=dict(color=px.colors.qualitative.Plotly[i % len(px.colors.qualitative.Plotly)])
+                                ))
+                            fig.update_layout(
+                                title="Career Timeline",
+                                xaxis_title="Duration (months)",
+                                yaxis_title="Company",
+                                height=400,
+                                margin=dict(l=0, r=0, b=0, t=30)
+                            )
+                            st.plotly_chart(fig, use_container_width=True)
+                        except Exception as e:
+                            st.warning(f"Could not create timeline visualization: {e}")
             else:
                 st.warning("No work experience data could be extracted.")
                 # Show must-have skills that are missing
                 missing_must_have = [skill for skill in job_descriptions[job_title]["must_have"]
+                                   if skill not in analysis_results['found_skills']]
                 if missing_must_have:
                     st.error("**Critical Skills Missing:**")
                 # Show nice-to-have skills gap
                 missing_nice_to_have = [skill for skill in job_descriptions[job_title]["nice_to_have"]
+                                      if skill not in analysis_results['found_skills']]
                 if missing_nice_to_have:
                     st.warning("**Nice-to-Have Skills Missing:**")
                         st.write(f"- {skill.title()}")
                 else:
                     st.success("Candidate has all the nice-to-have skills!")
+            # Display career trajectory
+            st.subheader("👨‍💼 Career Trajectory")
+            st.info(analysis_results['career_prediction'])
+        with tab4:
             # Display career advice
             st.subheader("🚀 Career Advice and Project Recommendations")
             if st.button("Generate Career Advice"):
                 with st.spinner("Generating personalized career advice..."):
+                    advice = generate_career_advice(text, job_title, analysis_results['found_skills'], missing_skills)
                     st.markdown(advice)
     except Exception as e:
         st.error(f"An error occurred while processing the resume: {str(e)}")
+        st.exception(e)
 # Add footer
 st.markdown("---")
+st.markdown("Made with ❤️ using Streamlit and Hugging Face")

requirements.txt CHANGED Viewed

@@ -1,24 +1,22 @@
-streamlit==1.22.0
-pdfplumber==0.9.0
-spacy==3.5.3
-transformers==4.28.1
-torch==1.13.1
-huggingface-hub==0.14.1
 sentence-transformers==2.2.2
-nltk==3.8.1
-plotly==5.14.1
 pandas==1.5.3
 numpy==1.24.3
 matplotlib==3.7.1
-pydantic==1.10.8
-protobuf<4.0.0
-tqdm>=4.27
-regex>=2022.1.18
-scikit-learn==1.0.2
-scipy==1.8.1
-dateparser==1.1.8
-python-Levenshtein==0.21.1
-networkx==2.8.8
-faiss-cpu==1.7.4
-beautifulsoup4==4.12.2
-requests==2.31.0

+# Core dependencies - order matters!
+pydantic==1.10.8
+spacy==3.5.0
 sentence-transformers==2.2.2
+torch==1.13.1
+transformers==4.28.1
+# PDF processing
+pdfplumber==0.9.0
+# Web UI
+streamlit==1.22.0
+# Data processing
 pandas==1.5.3
 numpy==1.24.3
 matplotlib==3.7.1
+plotly==5.14.1
+# Utilities
+nltk==3.8.1
+scikit-learn==1.0.2