File size: 7,403 Bytes
ce8343a
 
c428eb8
ce8343a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0400195
ce8343a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import json
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
import torch
import os
import gradio_client.utils as client_utils
import sys


# ===============================
# Device and Model Setup
# ===============================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
hf_token = os.environ["HF_TOKEN"]

#Load the model
model_path = "AI-Mock-Interviewer/T5"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
model.to(device)

model = torch.compile(model)

# ------------------- UPDATED SYSTEM PROMPT -------------------
system_prompt = """
You are conducting a mock technical interview. The candidate's experience level can be entry-level, mid-level, or senior-level. Generate questions and follow-up questions based on the domain and the candidate's experience level. Consider these aspects:
1. The question should be relevant to the domain (e.g., software engineering, machine learning) and appropriate for the candidate's experience level.
2. Ensure each question is unique and does not repeat previously asked questions.
3. Ensure each question covers a different sub-topic within the domain, avoiding redundancy.
4. If no clear follow-up can be derived, generate a fresh, related question from a different aspect of the domain.
Important: Ensure that each question is clear, concise, and allows the candidate to demonstrate their technical and communicative abilities effectively.
"""

# Define sub-topic categories for different domains
subtopic_keywords = {
    "data analysis": [
        "data cleaning", "missing data", "outliers",
        "feature engineering", "EDA", "trend analysis",
        "data visualization"
    ],
    "machine learning": [
        "supervised learning", "unsupervised learning",
        "model evaluation", "bias-variance tradeoff",
        "overfitting", "hyperparameter tuning"
    ],
    "software engineering": [
        "agile methodology", "code optimization",
        "design patterns", "database design",
        "testing strategies"
    ],
}

def identify_subtopic(question, domain):
    """Identify the sub-topic of a question using predefined keywords."""
    domain = domain.lower()
    if domain in subtopic_keywords:
        for subtopic in subtopic_keywords[domain]:
            if subtopic in question.lower():
                return subtopic
    return None

# We'll keep global sets here only if needed as a fallback:
asked_questions = set()
asked_subtopics = set()

def generate_question(prompt, domain, state=None):
    """
    Generates a unique question based on the prompt and domain.
    Uses 'state' to track uniqueness in the conversation session.
    """
    while True:
        full_prompt = system_prompt + "\n" + prompt
        inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
        outputs = model.generate(
            inputs["input_ids"],
            max_new_tokens=50,
            num_return_sequences=1,
            no_repeat_ngram_size=2,
            top_k=30,
            top_p=0.9,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
        )
        question = tokenizer.decode(outputs[0], skip_special_tokens=True)
        question = question.replace(full_prompt, "").strip()

        # Ensure question ends with a question mark
        if not question.endswith("?"):
            question = question.split("?")[0] + "?"

        # Identify the subtopic
        subtopic = identify_subtopic(question, domain)

        if state is not None:
            # Use session-level sets to ensure uniqueness
            if (question not in state["asked_questions"] and
                (subtopic is None or subtopic not in state["asked_subtopics"])):
                state["asked_questions"].add(question)
                if subtopic:
                    state["asked_subtopics"].add(subtopic)
                return question
        else:
            # Fallback to global sets if no state is provided
            if question not in asked_questions and (subtopic is None or subtopic not in asked_subtopics):
                asked_questions.add(question)
                if subtopic:
                    asked_subtopics.add(subtopic)
                return question

def reset_state(domain, company, level):
    """
    Resets or initializes the session state.
    """
    return {
       "domain": domain,
       "company": company,
       "level": level,
       "asked_questions": set(),
       "asked_subtopics": set(),
       "conversation": []  # List of (speaker, message) tuples
    }

def start_interview(domain, company, level):
    """
    Initializes a new interactive interview session with the first question.
    """
    state = reset_state(domain, company, level)
    prompt = (f"Domain: {domain}. "
              + (f"Company: {company}. " if company else "")
              + f"Candidate experience level: {level}. "
                "Generate the first question:")

    question = generate_question(prompt, domain, state)
    state["conversation"].append(("Interviewer", question))
    return state["conversation"], state

def submit_response(candidate_response, state):
    """
    Accepts the candidate's response, updates conversation, generates the next question.
    Allows quitting the interview by typing 'quit'.
    """
    if candidate_response.strip().lower() == "quit":
        state["conversation"].append(("Candidate", candidate_response))
        state["conversation"].append(("Interviewer", "Interview session has ended. Thank you for participating!"))
        return state["conversation"], state

    state["conversation"].append(("Candidate", candidate_response))
    prompt = (f"Domain: {state['domain']}. "
              f"Candidate's experience level: {state['level']}. "
               "Generate the next interview question:")

    question = generate_question(prompt, state["domain"], state)
    state["conversation"].append(("Interviewer", question))
    return state["conversation"], state

# Build an interactive Gradio interface using Blocks
with gr.Blocks() as demo:
    gr.Markdown("# Interactive Mock Interview")
    with gr.Row():
        domain_input = gr.Textbox(label="Domain", placeholder="e.g. Software Engineering")
        company_input = gr.Textbox(label="Company (Optional)", placeholder="e.g. Google")
        level_input = gr.Dropdown(
            label="Experience Level",
            choices=["Entry-Level", "Mid-Level", "Senior-Level"],
            value="Entry-Level"
        )

    start_button = gr.Button("Start Interview")
    chatbot = gr.Chatbot(label="Interview Conversation", type="messages")

    with gr.Row():
        response_input = gr.Textbox(label="Your Response", placeholder="Type 'quit' to end the interview")
        submit_button = gr.Button("Submit")

    # State to hold session data
    state = gr.State()

    # Start interview
    start_button.click(
        start_interview,
        inputs=[domain_input, company_input, level_input],
        outputs=[chatbot, state]
    )

    # Submit response
    submit_button.click(
        submit_response,
        inputs=[response_input, state],
        outputs=[chatbot, state]
    ).then(
        lambda: "", None, response_input  # Clear input box after submission
    )


demo.launch()