Spaces:
Running
Running
import gradio as gr | |
from transformers import AutoTokenizer, T5ForConditionalGeneration | |
import torch | |
import os | |
import gradio_client.utils as client_utils | |
import sys | |
# ======== Patch for Gradio JSON Schema Bug ======== | |
def _patched_json_schema_to_python_type(schema, defs=None, depth=0): | |
if depth > 100: | |
return "Any" | |
if isinstance(schema, bool): | |
return "Any" if schema else "None" | |
try: | |
return client_utils._json_schema_to_python_type(schema, defs) | |
except RecursionError: | |
return "Any" | |
client_utils._json_schema_to_python_type = _patched_json_schema_to_python_type | |
sys.setrecursionlimit(10000) | |
# ======== Setup Device ======== | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# ======== Load Model and Tokenizer ======== | |
model_name = "AI-Mock-Interviewer/T5" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = T5ForConditionalGeneration.from_pretrained(model_name) | |
# Fix for attention mask warning | |
if tokenizer.pad_token is None: | |
tokenizer.pad_token = tokenizer.eos_token | |
# ======== System Prompt ======== | |
system_prompt = """ | |
You are conducting a mock technical interview. Generate questions and follow-up questions based on the domain provided. Consider these aspects: | |
1. The question should be relevant to the domain (e.g., software engineering, machine learning). | |
2. For follow-up questions, analyze the candidate's last response and ask questions that probe deeper into their understanding, challenge their approach, or request clarification. | |
3. The follow-up question should aim to explore the candidate's depth of knowledge and ability to adapt. | |
4. Ensure each question is unique and does not repeat previously asked questions. | |
5. Ensure each question covers a different sub-topic within the domain, avoiding redundancy. | |
6. If no clear follow-up can be derived, generate a fresh, related question from a different aspect of the domain. | |
Important: Ensure that each question is clear, concise, and allows the candidate to demonstrate their technical and communicative abilities effectively. | |
""" | |
# ======== Subtopic Keywords ======== | |
subtopic_keywords = { | |
"data analysis": ["data cleaning", "missing data", "outliers", "feature engineering", "EDA", "trend analysis", "data visualization"], | |
"machine learning": ["supervised learning", "unsupervised learning", "model evaluation", "bias-variance tradeoff", "overfitting", "hyperparameter tuning"], | |
"software engineering": ["agile methodology", "code optimization", "design patterns", "database design", "testing strategies"], | |
} | |
def identify_subtopic(question, domain): | |
domain = domain.lower() | |
if domain in subtopic_keywords: | |
for subtopic in subtopic_keywords[domain]: | |
if subtopic in question.lower(): | |
return subtopic | |
return None | |
def generate_question(prompt, domain, state=None, max_attempts=10): | |
attempts = 0 | |
while attempts < max_attempts: | |
attempts += 1 | |
full_prompt = f"{system_prompt.strip()}\n{prompt.strip()}" | |
inputs = tokenizer(full_prompt, return_tensors="pt").to(device) | |
outputs = model.generate( | |
inputs["input_ids"], | |
max_new_tokens=50, | |
num_return_sequences=1, | |
no_repeat_ngram_size=2, | |
top_k=30, | |
top_p=0.9, | |
temperature=0.7, | |
do_sample=True, | |
pad_token_id=tokenizer.pad_token_id, | |
) | |
question = tokenizer.decode(outputs[0], skip_special_tokens=True).strip() | |
if not question.endswith("?"): | |
question = question.split("?")[0] + "?" | |
subtopic = identify_subtopic(question, domain) | |
if state: | |
if question not in state["asked_questions"] and (subtopic is None or subtopic not in state["asked_subtopics"]): | |
state["asked_questions"].add(question) | |
if subtopic: | |
state["asked_subtopics"].add(subtopic) | |
return question | |
raise RuntimeError("Failed to generate a unique question after multiple attempts.") | |
def reset_state(domain, company): | |
return { | |
"domain": domain, | |
"company": company, | |
"asked_questions": set(), | |
"asked_subtopics": set(), | |
"conversation": [] # List of dicts: {"role": ..., "content": ...} | |
} | |
def start_interview(domain, company): | |
state = reset_state(domain, company) | |
prompt = f"Domain: {domain}. " + (f"Company: {company}. " if company else "") + "Generate the first question:" | |
question = generate_question(prompt, domain, state) | |
state["conversation"].append({"role": "assistant", "content": question}) | |
return state["conversation"], state | |
def submit_response(candidate_response, state): | |
state["conversation"].append({"role": "user", "content": candidate_response}) | |
prompt = f"Domain: {state['domain']}. Candidate's last response: {candidate_response}. Generate a follow-up question with a new perspective:" | |
question = generate_question(prompt, state["domain"], state) | |
state["conversation"].append({"role": "assistant", "content": question}) | |
return state["conversation"], state | |
# ======== Gradio Interface ======== | |
with gr.Blocks() as demo: | |
gr.Markdown("# ποΈ Interactive Mock Interview") | |
with gr.Row(): | |
domain_input = gr.Textbox(label="Domain") | |
company_input = gr.Textbox(label="Company (Optional)") | |
start_button = gr.Button("π Start Interview") | |
chatbot = gr.Chatbot(label="Interview Conversation", type="messages") | |
with gr.Row(): | |
response_input = gr.Textbox(label="Your Response") | |
submit_button = gr.Button("Submit") | |
state = gr.State({}) # Session state holder | |
# Hook buttons to logic | |
start_button.click(start_interview, inputs=[domain_input, company_input], outputs=[chatbot, state]) | |
submit_button.click(submit_response, inputs=[response_input, state], outputs=[chatbot, state]).then( | |
lambda: "", inputs=[], outputs=[response_input] # Clear textbox after submit | |
) | |
demo.launch() | |