Spaces:
Running
Running
import json | |
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM | |
import torch | |
import os | |
import gradio_client.utils as client_utils | |
import sys | |
# =============================== | |
# Device and Model Setup | |
# =============================== | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
hf_token = os.environ["HF_TOKEN"] | |
#Load the model | |
model_path = "AI-Mock-Interviewer/T5" | |
tokenizer = AutoTokenizer.from_pretrained(model_path) | |
model = AutoModelForSeq2SeqLM.from_pretrained(model_path) | |
model.to(device) | |
model = torch.compile(model) | |
# ------------------- UPDATED SYSTEM PROMPT ------------------- | |
system_prompt = """ | |
You are conducting a mock technical interview. The candidate's experience level can be entry-level, mid-level, or senior-level. Generate questions and follow-up questions based on the domain and the candidate's experience level. Consider these aspects: | |
1. The question should be relevant to the domain (e.g., software engineering, machine learning) and appropriate for the candidate's experience level. | |
2. Ensure each question is unique and does not repeat previously asked questions. | |
3. Ensure each question covers a different sub-topic within the domain, avoiding redundancy. | |
4. If no clear follow-up can be derived, generate a fresh, related question from a different aspect of the domain. | |
Important: Ensure that each question is clear, concise, and allows the candidate to demonstrate their technical and communicative abilities effectively. | |
""" | |
# Define sub-topic categories for different domains | |
subtopic_keywords = { | |
"data analysis": [ | |
"data cleaning", "missing data", "outliers", | |
"feature engineering", "EDA", "trend analysis", | |
"data visualization" | |
], | |
"machine learning": [ | |
"supervised learning", "unsupervised learning", | |
"model evaluation", "bias-variance tradeoff", | |
"overfitting", "hyperparameter tuning" | |
], | |
"software engineering": [ | |
"agile methodology", "code optimization", | |
"design patterns", "database design", | |
"testing strategies" | |
], | |
} | |
def identify_subtopic(question, domain): | |
"""Identify the sub-topic of a question using predefined keywords.""" | |
domain = domain.lower() | |
if domain in subtopic_keywords: | |
for subtopic in subtopic_keywords[domain]: | |
if subtopic in question.lower(): | |
return subtopic | |
return None | |
# We'll keep global sets here only if needed as a fallback: | |
asked_questions = set() | |
asked_subtopics = set() | |
def generate_question(prompt, domain, state=None): | |
""" | |
Generates a unique question based on the prompt and domain. | |
Uses 'state' to track uniqueness in the conversation session. | |
""" | |
while True: | |
full_prompt = system_prompt + "\n" + prompt | |
inputs = tokenizer(full_prompt, return_tensors="pt").to(device) | |
outputs = model.generate( | |
inputs["input_ids"], | |
max_new_tokens=50, | |
num_return_sequences=1, | |
no_repeat_ngram_size=2, | |
top_k=30, | |
top_p=0.9, | |
temperature=0.7, | |
do_sample=True, | |
pad_token_id=tokenizer.eos_token_id, | |
) | |
question = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
question = question.replace(full_prompt, "").strip() | |
# Ensure question ends with a question mark | |
if not question.endswith("?"): | |
question = question.split("?")[0] + "?" | |
# Identify the subtopic | |
subtopic = identify_subtopic(question, domain) | |
if state is not None: | |
# Use session-level sets to ensure uniqueness | |
if (question not in state["asked_questions"] and | |
(subtopic is None or subtopic not in state["asked_subtopics"])): | |
state["asked_questions"].add(question) | |
if subtopic: | |
state["asked_subtopics"].add(subtopic) | |
return question | |
else: | |
# Fallback to global sets if no state is provided | |
if question not in asked_questions and (subtopic is None or subtopic not in asked_subtopics): | |
asked_questions.add(question) | |
if subtopic: | |
asked_subtopics.add(subtopic) | |
return question | |
def reset_state(domain, company, level): | |
""" | |
Resets or initializes the session state. | |
""" | |
return { | |
"domain": domain, | |
"company": company, | |
"level": level, | |
"asked_questions": set(), | |
"asked_subtopics": set(), | |
"conversation": [] # List of (speaker, message) tuples | |
} | |
def start_interview(domain, company, level): | |
""" | |
Initializes a new interactive interview session with the first question. | |
""" | |
state = reset_state(domain, company, level) | |
prompt = (f"Domain: {domain}. " | |
+ (f"Company: {company}. " if company else "") | |
+ f"Candidate experience level: {level}. " | |
"Generate the first question:") | |
question = generate_question(prompt, domain, state) | |
state["conversation"].append(("Interviewer", question)) | |
return state["conversation"], state | |
def submit_response(candidate_response, state): | |
""" | |
Accepts the candidate's response, updates conversation, generates the next question. | |
Allows quitting the interview by typing 'quit'. | |
""" | |
if candidate_response.strip().lower() == "quit": | |
state["conversation"].append(("Candidate", candidate_response)) | |
state["conversation"].append(("Interviewer", "Interview session has ended. Thank you for participating!")) | |
return state["conversation"], state | |
state["conversation"].append(("Candidate", candidate_response)) | |
prompt = (f"Domain: {state['domain']}. " | |
f"Candidate's experience level: {state['level']}. " | |
"Generate the next interview question:") | |
question = generate_question(prompt, state["domain"], state) | |
state["conversation"].append(("Interviewer", question)) | |
return state["conversation"], state | |
# Build an interactive Gradio interface using Blocks | |
with gr.Blocks() as demo: | |
gr.Markdown("# Interactive Mock Interview") | |
with gr.Row(): | |
domain_input = gr.Textbox(label="Domain", placeholder="e.g. Software Engineering") | |
company_input = gr.Textbox(label="Company (Optional)", placeholder="e.g. Google") | |
level_input = gr.Dropdown( | |
label="Experience Level", | |
choices=["Entry-Level", "Mid-Level", "Senior-Level"], | |
value="Entry-Level" | |
) | |
start_button = gr.Button("Start Interview") | |
chatbot = gr.Chatbot(label="Interview Conversation", type="messages") | |
with gr.Row(): | |
response_input = gr.Textbox(label="Your Response", placeholder="Type 'quit' to end the interview") | |
submit_button = gr.Button("Submit") | |
# State to hold session data | |
state = gr.State() | |
# Start interview | |
start_button.click( | |
start_interview, | |
inputs=[domain_input, company_input, level_input], | |
outputs=[chatbot, state] | |
) | |
# Submit response | |
submit_button.click( | |
submit_response, | |
inputs=[response_input, state], | |
outputs=[chatbot, state] | |
).then( | |
lambda: "", None, response_input # Clear input box after submission | |
) | |
demo.launch() | |