Spaces:
Sleeping
Sleeping
""" | |
Hotel Review Analysis System for The Kimberley Hotel Hong Kong | |
ISOM5240 Group Project | |
This Streamlit application analyzes guest reviews in multiple languages, performs sentiment | |
analysis and aspect detection, then generates professional responses. | |
""" | |
import streamlit as st | |
from transformers import ( | |
pipeline, | |
AutoModelForSequenceClassification, | |
AutoTokenizer | |
) | |
import torch | |
import re | |
import pyperclip | |
from langdetect import detect | |
# ===== CONSTANTS ===== | |
MAX_CHARS = 500 # Strict character limit for reviews as per requirements | |
# Supported languages with their display names | |
# Note: Chinese model handles both Mandarin and Cantonese text | |
SUPPORTED_LANGUAGES = { | |
'en': 'English', | |
'zh': 'Chinese (Mandarin/Cantonese)', | |
'ja': 'Japanese', | |
'ko': 'Korean', | |
'fr': 'French', | |
'de': 'German' | |
} | |
# ===== ASPECT CONFIGURATION ===== | |
# Dictionary mapping aspect categories to their keywords | |
# Used for both keyword matching and zero-shot classification | |
aspect_map = { | |
# Location related aspects | |
"location": ["location", "near", "close", "access", "transport", "distance", "area", "tsim sha tsui", "kowloon"], | |
"view": ["view", "scenery", "vista", "panorama", "outlook", "skyline"], | |
"parking": ["parking", "valet", "garage", "car park", "vehicle"], | |
# Room related aspects | |
"room comfort": ["comfortable", "bed", "pillows", "mattress", "linens", "cozy", "hard", "soft"], | |
"room cleanliness": ["clean", "dirty", "spotless", "stains", "hygiene", "sanitation", "dusty"], | |
"room amenities": ["amenities", "minibar", "coffee", "tea", "fridge", "facilities", "tv", "kettle"], | |
"bathroom": ["bathroom", "shower", "toilet", "sink", "towel", "faucet", "toiletries"], | |
# Service related aspects | |
"staff service": ["staff", "friendly", "helpful", "rude", "welcoming", "employee", "manager"], | |
"reception": ["reception", "check-in", "check-out", "front desk", "welcome", "registration"], | |
"housekeeping": ["housekeeping", "maid", "cleaning", "towels", "service", "turndown"], | |
"concierge": ["concierge", "recommendation", "advice", "tips", "guidance", "directions"], | |
"room service": ["room service", "food delivery", "order", "meal", "tray"], | |
# Facilities aspects | |
"dining": ["breakfast", "dinner", "restaurant", "meal", "food", "buffet", "lunch"], | |
"bar": ["bar", "drinks", "cocktail", "wine", "lounge", "happy hour"], | |
"pool": ["pool", "swimming", "jacuzzi", "sun lounger", "deck", "towels"], | |
"spa": ["spa", "massage", "treatment", "relax", "wellness", "sauna"], | |
"fitness": ["gym", "fitness", "exercise", "workout", "training", "weights"], | |
# Technical aspects | |
"Wi-Fi": ["wifi", "internet", "connection", "online", "network", "speed"], | |
"AC": ["air conditioning", "AC", "temperature", "heating", "cooling", "ventilation"], | |
"elevator": ["elevator", "lift", "escalator", "vertical transport", "wait"], | |
# Value aspects | |
"pricing": ["price", "expensive", "cheap", "value", "rate", "cost", "worth"], | |
"extra charges": ["charge", "fee", "bill", "surcharge", "additional", "hidden"] | |
} | |
# Pre-defined professional responses for positive aspects | |
aspect_responses = { | |
"location": "We're delighted you enjoyed our prime location in the heart of Tsim Sha Tsui.", | |
"view": "It's wonderful to hear you appreciated the views from your room.", | |
"room comfort": "Our team takes special care to ensure room comfort for all guests.", | |
# ... (other responses remain unchanged) | |
} | |
# Improvement actions for negative aspects | |
improvement_actions = { | |
"AC": "have addressed the air conditioning issues", | |
"housekeeping": "have reviewed our cleaning procedures", | |
# ... (other actions remain unchanged) | |
} | |
# ===== MODEL CONFIGURATION ===== | |
# Helsinki-NLP translation models for supported language pairs | |
TRANSLATION_MODELS = { | |
# Translations to English (for analysis) | |
'zh-en': 'Helsinki-NLP/opus-mt-zh-en', # Chinese | |
'ja-en': 'Helsinki-NLP/opus-mt-ja-en', # Japanese | |
'ko-en': 'Helsinki-NLP/opus-mt-ko-en', # Korean | |
'fr-en': 'Helsinki-NLP/opus-mt-fr-en', # French | |
'de-en': 'Helsinki-NLP/opus-mt-de-en', # German | |
# Translations from English (for responses) | |
'en-zh': 'Helsinki-NLP/opus-mt-en-zh', | |
'en-ja': 'Helsinki-NLP/opus-mt-en-ja', | |
'en-ko': 'Helsinki-NLP/opus-mt-en-ko', | |
'en-fr': 'Helsinki-NLP/opus-mt-en-fr', | |
'en-de': 'Helsinki-NLP/opus-mt-en-de' | |
} | |
# ===== MODEL LOADING FUNCTIONS ===== | |
def load_sentiment_model(): | |
""" | |
Load and cache the fine-tuned sentiment analysis model. | |
Uses a BERTweet model fine-tuned on hotel reviews. | |
Returns: | |
tuple: (model, tokenizer) | |
""" | |
model = AutoModelForSequenceClassification.from_pretrained("smtsead/fine_tuned_bertweet_hotel") | |
tokenizer = AutoTokenizer.from_pretrained('finiteautomata/bertweet-base-sentiment-analysis') | |
return model, tokenizer | |
def load_aspect_classifier(): | |
""" | |
Load and cache the zero-shot aspect classifier. | |
Uses DeBERTa model for multi-label aspect classification. | |
Returns: | |
pipeline: Zero-shot classification pipeline | |
""" | |
return pipeline("zero-shot-classification", model="MoritzLaurer/deberta-v3-base-zeroshot-v1.1-all-33") | |
def load_translation_model(src_lang, target_lang='en'): | |
""" | |
Load and cache the appropriate Helsinki-NLP translation model. | |
Args: | |
src_lang (str): Source language code | |
target_lang (str): Target language code (default 'en') | |
Returns: | |
pipeline: Translation pipeline | |
Raises: | |
ValueError: If language pair is not supported | |
""" | |
model_key = f"{src_lang}-{target_lang}" | |
if model_key not in TRANSLATION_MODELS: | |
raise ValueError(f"Unsupported translation: {src_lang}→{target_lang}") | |
return pipeline("translation", model=TRANSLATION_MODELS[model_key]) | |
# ===== CORE FUNCTIONS ===== | |
def translate_text(text, src_lang, target_lang='en'): | |
""" | |
Translate text between supported languages using Helsinki-NLP models. | |
Args: | |
text (str): Text to translate | |
src_lang (str): Source language code | |
target_lang (str): Target language code (default 'en') | |
Returns: | |
dict: Translation results or error message | |
""" | |
try: | |
if src_lang == target_lang: | |
return {'translation': text, 'source_lang': src_lang} | |
translator = load_translation_model(src_lang, target_lang) | |
result = translator(text)[0]['translation_text'] | |
return { | |
'original': text, | |
'translation': result, | |
'source_lang': src_lang, | |
'target_lang': target_lang | |
} | |
except Exception as e: | |
return {'error': str(e)} | |
def analyze_sentiment(text, model, tokenizer): | |
""" | |
Perform sentiment analysis on text. | |
Args: | |
text (str): Text to analyze | |
model: Pretrained sentiment model | |
tokenizer: Corresponding tokenizer | |
Returns: | |
dict: Sentiment analysis results (label, confidence, sentiment) | |
""" | |
inputs = tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors='pt') | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
probs = torch.nn.functional.softmax(outputs.logits, dim=-1) | |
predicted_label = torch.argmax(probs).item() | |
confidence = torch.max(probs).item() | |
return { | |
'label': predicted_label, | |
'confidence': f"{confidence:.0%}", | |
'sentiment': 'POSITIVE' if predicted_label else 'NEGATIVE' | |
} | |
def detect_aspects(text, aspect_classifier): | |
""" | |
Detect hotel aspects mentioned in text using two-stage approach: | |
1. Keyword matching to identify potential aspects | |
2. Zero-shot classification to confirm and score aspects | |
Args: | |
text (str): Text to analyze | |
aspect_classifier: Zero-shot classification pipeline | |
Returns: | |
list: Detected aspects with confidence scores | |
""" | |
relevant_aspects = [] | |
text_lower = text.lower() | |
for aspect, keywords in aspect_map.items(): | |
if any(re.search(rf'\b{kw}\b', text_lower) for kw in keywords): | |
relevant_aspects.append(aspect) | |
if relevant_aspects: | |
result = aspect_classifier( | |
text, | |
candidate_labels=relevant_aspects, | |
multi_label=True, | |
hypothesis_template="This review discusses the hotel's {}." | |
) | |
return [(aspect, f"{score:.0%}") for aspect, score in | |
zip(result['labels'], result['scores']) if score > 0.6] | |
return [] | |
def generate_response(sentiment, aspects, original_text): | |
""" | |
Generate professional response based on sentiment and aspects. | |
Args: | |
sentiment (dict): Sentiment analysis results | |
aspects (list): Detected aspects with scores | |
original_text (str): Original review text | |
Returns: | |
str: Generated response | |
""" | |
# Personalization - extract guest name if mentioned | |
guest_name = "" | |
name_match = re.search(r"(Mr\.|Ms\.|Mrs\.)\s(\w+)", original_text, re.IGNORECASE) | |
if name_match: | |
guest_name = f" {name_match.group(2)}" | |
if sentiment['label'] == 1: | |
response = f"""Dear{guest_name if guest_name else ' Valued Guest'}, | |
Thank you for choosing The Kimberley Hotel Hong Kong and for sharing your feedback.""" | |
# Add relevant aspect responses (limit to 2 most relevant) | |
added_aspects = set() | |
for aspect, _ in sorted(aspects, key=lambda x: float(x[1][:-1]), reverse=True): | |
if aspect in aspect_responses and aspect not in added_aspects: | |
response += "\n\n" + aspect_responses[aspect] | |
added_aspects.add(aspect) | |
if len(added_aspects) >= 2: | |
break | |
response += "\n\nWe look forward to welcoming you back.\n\nBest regards," | |
else: | |
response = f"""Dear{guest_name if guest_name else ' Guest'}, | |
Thank you for your feedback. We appreciate you taking the time to share your experience.""" | |
# Add improvement actions (limit to 2 most relevant) | |
added_improvements = set() | |
for aspect, _ in sorted(aspects, key=lambda x: float(x[1][:-1]), reverse=True): | |
if aspect in improvement_actions and aspect not in added_improvements: | |
response += f"\n\nRegarding your comments about the {aspect}, we {improvement_actions[aspect]}." | |
added_improvements.add(aspect) | |
if len(added_improvements) >= 2: | |
break | |
response += "\n\nPlease don't hesitate to contact us if we can be of further assistance.\n\nSincerely," | |
return response + "\nSam Tse\nGuest Relations Manager\nThe Kimberley Hotel Hong Kong" | |
# ===== STREAMLIT UI ===== | |
def main(): | |
"""Main application function for Streamlit interface""" | |
# Page configuration | |
st.set_page_config( | |
page_title="Kimberley Review Assistant", | |
page_icon="🏨", | |
layout="centered" | |
) | |
# Custom CSS styling | |
st.markdown(""" | |
<style> | |
/* Header styling */ | |
.header { | |
color: #003366; | |
font-size: 28px; | |
font-weight: bold; | |
margin-bottom: 10px; | |
} | |
/* Subheader styling */ | |
.subheader { | |
color: #666666; | |
font-size: 16px; | |
margin-bottom: 30px; | |
} | |
/* Language badge styling */ | |
.badge { | |
background-color: #e6f2ff; | |
color: #003366; | |
padding: 3px 10px; | |
border-radius: 15px; | |
font-size: 14px; | |
display: inline-block; | |
margin: 0 5px 5px 0; | |
} | |
/* Character counter styling */ | |
.char-counter { | |
font-size: 12px; | |
color: #666; | |
text-align: right; | |
margin-top: -15px; | |
margin-bottom: 15px; | |
} | |
/* Warning style for character limit */ | |
.char-counter.warning { | |
color: #ff6b6b; | |
} | |
/* Result box styling */ | |
.result-box { | |
border-left: 4px solid #003366; | |
padding: 15px; | |
background-color: #f9f9f9; | |
margin: 20px 0; | |
border-radius: 0 8px 8px 0; | |
white-space: pre-wrap; | |
} | |
/* Aspect badge styling */ | |
.aspect-badge { | |
background-color: #e6f2ff; | |
color: #003366; | |
padding: 2px 8px; | |
border-radius: 4px; | |
font-size: 14px; | |
display: inline-block; | |
margin: 2px; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# Application header | |
st.markdown('<div class="header">The Kimberley Hotel Hong Kong</div>', unsafe_allow_html=True) | |
st.markdown('<div class="subheader">Guest Review Analysis System</div>', unsafe_allow_html=True) | |
# Supported languages display | |
st.markdown("**Supported Review Languages:**") | |
lang_cols = st.columns(6) | |
for i, (code, name) in enumerate(SUPPORTED_LANGUAGES.items()): | |
lang_cols[i%6].markdown(f'<div class="badge">{name}</div>', unsafe_allow_html=True) | |
# Language selection dropdown | |
review_lang = st.selectbox( | |
"Select review language:", | |
options=list(SUPPORTED_LANGUAGES.keys()), | |
format_func=lambda x: SUPPORTED_LANGUAGES[x], | |
index=0 | |
) | |
# Review input with character counter | |
review = st.text_area("**Paste Guest Review:**", | |
height=200, | |
max_chars=MAX_CHARS, | |
placeholder=f"Enter review in any supported language (max {MAX_CHARS} characters)...", | |
key="review_input") | |
# Character counter logic | |
char_count = len(st.session_state.review_input) if 'review_input' in st.session_state else 0 | |
char_class = "warning" if char_count > MAX_CHARS else "" | |
st.markdown(f'<div class="char-counter {char_class}">{char_count}/{MAX_CHARS} characters</div>', | |
unsafe_allow_html=True) | |
# Main analysis button | |
if st.button("Analyze & Generate Response", type="primary"): | |
if not review.strip(): | |
st.error("Please enter a review") | |
return | |
# Enforce character limit | |
if char_count > MAX_CHARS: | |
st.warning(f"Review truncated to {MAX_CHARS} characters for analysis") | |
review = review[:MAX_CHARS] | |
with st.spinner("Analyzing feedback..."): | |
try: | |
# Translation to English if needed | |
if review_lang != 'en': | |
translation = translate_text(review, review_lang, 'en') | |
if 'error' in translation: | |
st.error(f"Translation error: {translation['error']}") | |
return | |
analysis_text = translation['translation'] | |
else: | |
analysis_text = review | |
# Load models | |
sentiment_model, tokenizer = load_sentiment_model() | |
aspect_classifier = load_aspect_classifier() | |
# Perform analysis | |
sentiment = analyze_sentiment(analysis_text, sentiment_model, tokenizer) | |
aspects = detect_aspects(analysis_text, aspect_classifier) | |
response = generate_response(sentiment, aspects, analysis_text) | |
# Translate response back to original language if needed | |
if review_lang != 'en': | |
translation_back = translate_text(response, 'en', review_lang) | |
if 'error' not in translation_back: | |
final_response = translation_back['translation'] | |
else: | |
st.warning(f"Couldn't translate response back: {translation_back['error']}") | |
final_response = response | |
else: | |
final_response = response | |
# Store results in session state | |
st.session_state.analysis_results = { | |
'sentiment': sentiment, | |
'aspects': aspects, | |
'response': final_response, | |
'original_lang': review_lang | |
} | |
# Display results | |
st.divider() | |
# Sentiment analysis results | |
col1, col2 = st.columns(2) | |
with col1: | |
st.markdown("### Sentiment Analysis") | |
sentiment_icon = "✅" if sentiment['label'] == 1 else "⚠️" | |
st.markdown(f"{sentiment_icon} **{sentiment['sentiment']}**") | |
st.caption(f"Confidence level: {sentiment['confidence']}") | |
# Detected aspects | |
with col2: | |
st.markdown("### Key Aspects Detected") | |
if aspects: | |
for aspect, score in sorted(aspects, key=lambda x: float(x[1][:-1]), reverse=True): | |
st.markdown(f'<div class="aspect-badge">{aspect} ({score})</div>', unsafe_allow_html=True) | |
else: | |
st.markdown("_No specific aspects detected_") | |
# Generated response | |
st.divider() | |
st.markdown("### Draft Response") | |
st.markdown(f'<div class="result-box">{final_response}</div>', unsafe_allow_html=True) | |
# Clipboard copy functionality | |
if st.button("Copy Response to Clipboard"): | |
try: | |
pyperclip.copy(final_response) | |
st.success("Response copied to clipboard!") | |
except Exception as e: | |
st.error(f"Could not copy to clipboard: {e}") | |
except Exception as e: | |
st.error(f"An error occurred during analysis: {str(e)}") | |
# Entry point | |
if __name__ == "__main__": | |
main() |