File size: 6,054 Bytes
ce8343a
50b5b5d
ce8343a
 
87b2a10
 
 
bef3288
87b2a10
 
 
 
 
 
 
 
 
 
 
 
ce8343a
bef3288
ce8343a
 
bef3288
83c12c7
50b5b5d
 
332bb26
bef3288
 
 
ce8343a
bef3288
ce8343a
50b5b5d
 
 
 
 
 
 
ce8343a
 
 
bef3288
ce8343a
50b5b5d
 
 
ce8343a
 
 
 
 
 
 
 
 
 
50b5b5d
 
 
 
 
ce8343a
 
 
 
 
 
 
 
 
 
bef3288
ce8343a
50b5b5d
ce8343a
 
bef3288
ce8343a
 
50b5b5d
 
ce8343a
 
 
 
 
50b5b5d
 
 
ce8343a
50b5b5d
 
 
 
bef3288
ce8343a
 
50b5b5d
 
 
ce8343a
bef3288
ce8343a
 
 
bef3288
50b5b5d
ce8343a
bef3288
ce8343a
 
bef3288
ce8343a
bef3288
 
ce8343a
50b5b5d
 
bef3288
 
 
 
ce8343a
50b5b5d
ce8343a
bef3288
 
 
 
50b5b5d
 
bef3288
ce8343a
 
bef3288
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import gradio as gr
from transformers import AutoTokenizer, T5ForConditionalGeneration
import torch
import os
import gradio_client.utils as client_utils
import sys

# ======== Patch for Gradio JSON Schema Bug ========
def _patched_json_schema_to_python_type(schema, defs=None, depth=0):
    if depth > 100:
        return "Any"
    if isinstance(schema, bool):
        return "Any" if schema else "None"
    try:
        return client_utils._json_schema_to_python_type(schema, defs)
    except RecursionError:
        return "Any"

client_utils._json_schema_to_python_type = _patched_json_schema_to_python_type
sys.setrecursionlimit(10000)

# ======== Setup Device ========
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ======== Load Model and Tokenizer ========
model_name = "AI-Mock-Interviewer/T5"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Fix for attention mask warning
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# ======== System Prompt ========
system_prompt = """
You are conducting a mock technical interview. Generate questions and follow-up questions based on the domain provided. Consider these aspects:
1. The question should be relevant to the domain (e.g., software engineering, machine learning).
2. For follow-up questions, analyze the candidate's last response and ask questions that probe deeper into their understanding, challenge their approach, or request clarification.
3. The follow-up question should aim to explore the candidate's depth of knowledge and ability to adapt.
4. Ensure each question is unique and does not repeat previously asked questions.
5. Ensure each question covers a different sub-topic within the domain, avoiding redundancy.
6. If no clear follow-up can be derived, generate a fresh, related question from a different aspect of the domain.
Important: Ensure that each question is clear, concise, and allows the candidate to demonstrate their technical and communicative abilities effectively.
"""

# ======== Subtopic Keywords ========
subtopic_keywords = {
    "data analysis": ["data cleaning", "missing data", "outliers", "feature engineering", "EDA", "trend analysis", "data visualization"],
    "machine learning": ["supervised learning", "unsupervised learning", "model evaluation", "bias-variance tradeoff", "overfitting", "hyperparameter tuning"],
    "software engineering": ["agile methodology", "code optimization", "design patterns", "database design", "testing strategies"],
}

def identify_subtopic(question, domain):
    domain = domain.lower()
    if domain in subtopic_keywords:
        for subtopic in subtopic_keywords[domain]:
            if subtopic in question.lower():
                return subtopic
    return None

def generate_question(prompt, domain, state=None, max_attempts=10):
    attempts = 0
    while attempts < max_attempts:
        attempts += 1
        full_prompt = f"{system_prompt.strip()}\n{prompt.strip()}"
        inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
        outputs = model.generate(
            inputs["input_ids"],
            max_new_tokens=50,
            num_return_sequences=1,
            no_repeat_ngram_size=2,
            top_k=30,
            top_p=0.9,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.pad_token_id,
        )
        question = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
        if not question.endswith("?"):
            question = question.split("?")[0] + "?"

        subtopic = identify_subtopic(question, domain)

        if state:
            if question not in state["asked_questions"] and (subtopic is None or subtopic not in state["asked_subtopics"]):
                state["asked_questions"].add(question)
                if subtopic:
                    state["asked_subtopics"].add(subtopic)
                return question

    raise RuntimeError("Failed to generate a unique question after multiple attempts.")

def reset_state(domain, company):
    return {
        "domain": domain,
        "company": company,
        "asked_questions": set(),
        "asked_subtopics": set(),
        "conversation": []  # List of dicts: {"role": ..., "content": ...}
    }

def start_interview(domain, company):
    state = reset_state(domain, company)
    prompt = f"Domain: {domain}. " + (f"Company: {company}. " if company else "") + "Generate the first question:"
    question = generate_question(prompt, domain, state)
    state["conversation"].append({"role": "assistant", "content": question})
    return state["conversation"], state

def submit_response(candidate_response, state):
    state["conversation"].append({"role": "user", "content": candidate_response})
    prompt = f"Domain: {state['domain']}. Candidate's last response: {candidate_response}. Generate a follow-up question with a new perspective:"
    question = generate_question(prompt, state["domain"], state)
    state["conversation"].append({"role": "assistant", "content": question})
    return state["conversation"], state

# ======== Gradio Interface ========
with gr.Blocks() as demo:
    gr.Markdown("# πŸŽ™οΈ Interactive Mock Interview")

    with gr.Row():
        domain_input = gr.Textbox(label="Domain")
        company_input = gr.Textbox(label="Company (Optional)")

    start_button = gr.Button("πŸš€ Start Interview")
    chatbot = gr.Chatbot(label="Interview Conversation", type="messages")

    with gr.Row():
        response_input = gr.Textbox(label="Your Response")
        submit_button = gr.Button("Submit")

    state = gr.State({})  # Session state holder

    # Hook buttons to logic
    start_button.click(start_interview, inputs=[domain_input, company_input], outputs=[chatbot, state])
    submit_button.click(submit_response, inputs=[response_input, state], outputs=[chatbot, state]).then(
        lambda: "", inputs=[], outputs=[response_input]  # Clear textbox after submit
    )

demo.launch()