JUBJAI

Sleeping

App Files Files Community

IS361Group4 commited on 25 days ago

Commit

b68a3c9

verified ·

1 Parent(s): d03ac60

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -137

app.py CHANGED Viewed

@@ -1,189 +1,170 @@
 import os
 import gradio as gr
 import pandas as pd
 import numpy as np
 import joblib
 import spacy
-from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification
 from langchain_core.pydantic_v1 import BaseModel, Field
 from langchain.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
 from langchain.output_parsers import PydanticOutputParser
 from langchain_openai import ChatOpenAI
-# --- Translator App ---
 chat = ChatOpenAI()
 class TextTranslator(BaseModel):
-    output: str = Field(description="Python string containing the output text translated in the desired language")
 output_parser = PydanticOutputParser(pydantic_object=TextTranslator)
 format_instructions = output_parser.get_format_instructions()
-def text_translator(input_text : str, language : str) -> str:
-    human_template = """Enter the text that you want to translate:
-                {input_text}, and enter the language that you want it to translate to {language}. {format_instructions}"""
-    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
-    chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
-    prompt = chat_prompt.format_prompt(input_text = input_text, language = language, format_instructions = format_instructions)
-    messages = prompt.to_messages()
-    response = chat(messages = messages)
-    output = output_parser.parse(response.content)
-    return output.output
-translator_tab = gr.Interface(fn=text_translator,
-    inputs=[gr.Textbox(label="Text to translate"), gr.Textbox(label="Target Language")],
-    outputs=[gr.Textbox(label="Translated Text")],
-    title="Text Translator")
-# --- Sentiment Analysis App ---
 sentiment_model = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment")
 def sentiment_analysis(message, history):
     result = sentiment_model(message)
-    return f"Sentiment: {result[0]['label']} (Probability: {result[0]['score']:.2f})"
-sentiment_tab = gr.ChatInterface(fn=sentiment_analysis, title="Sentiment Analysis")
-# --- Financial Analyst ---
-spacy_model = spacy.load('en_core_web_sm')
-spacy_model.add_pipe('sentencizer')
-auth_token = os.environ.get("HF_Token")
-asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
-summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
-fin_model = pipeline("sentiment-analysis", model='yiyanghkust/finbert-tone', tokenizer='yiyanghkust/finbert-tone')
 def split_in_sentences(text):
-    doc = spacy_model(text)
-    return [str(sent).strip() for sent in doc.sents]
 def make_spans(text, results):
-    return list(zip(split_in_sentences(text), [r["label"] for r in results]))
-def speech_to_text(speech):
-    return asr(speech)["text"]
 def summarize_text(text):
     return summarizer(text)[0]['summary_text']
 def text_to_sentiment(text):
     return fin_model(text)[0]["label"]
 def fin_ext(text):
-    results = fin_model(split_in_sentences(text))
-    return make_spans(text, results)
 def fls(text):
-    fls_model = pipeline("text-classification", model="demo-org/finbert_fls", tokenizer="demo-org/finbert_fls", use_auth_token=auth_token)
-    results = fls_model(split_in_sentences(text))
-    return make_spans(text, results)
-def fin_ner(text):
-    api = gr.Interface.load("dslim/bert-base-NER", src='models', use_auth_token=auth_token)
-    return api(text)
-financial_tab = gr.Blocks()
-with financial_tab:
-    gr.Markdown("## Financial Analyst AI")
-    audio_file = gr.Audio(source="microphone", type="filepath")
-    text = gr.Textbox(label="Recognized Text")
-    summary = gr.Textbox(label="Summary")
-    tone = gr.Label(label="Financial Tone")
-    spans = gr.HighlightedText()
-    fls_spans = gr.HighlightedText()
-    ner_spans = gr.HighlightedText()
-    with gr.Row():
-        gr.Button("Recognize Speech").click(speech_to_text, inputs=audio_file, outputs=text)
-        gr.Button("Summarize Text").click(summarize_text, inputs=text, outputs=summary)
-        gr.Button("Classify Tone").click(text_to_sentiment, inputs=summary, outputs=tone)
-    with gr.Row():
-        gr.Button("Financial Sentiment").click(fin_ext, inputs=text, outputs=spans)
-        gr.Button("Forward Looking").click(fls, inputs=text, outputs=fls_spans)
-        gr.Button("NER Companies").click(fin_ner, inputs=text, outputs=ner_spans)
-# --- Personal Information Detection ---
-pii_tab = gr.load("models/iiiorg/piiranha-v1-detect-personal-information")
-# --- Customer Churn ---
 script_dir = os.path.dirname(os.path.abspath(__file__))
-pipeline = joblib.load(os.path.join(script_dir, 'toolkit', 'pipeline.joblib'))
-model = joblib.load(os.path.join(script_dir, 'toolkit', 'Random Forest Classifier.joblib'))
 def calculate_total_charges(tenure, monthly_charges):
     return tenure * monthly_charges
-def predict_churn(SeniorCitizen, Partner, Dependents, tenure,
             InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
             StreamingTV, StreamingMovies, Contract, PaperlessBilling, PaymentMethod,
             MonthlyCharges):
     TotalCharges = calculate_total_charges(tenure, MonthlyCharges)
     input_df = pd.DataFrame({
-        'SeniorCitizen': [SeniorCitizen],
-        'Partner': [Partner],
-        'Dependents': [Dependents],
-        'tenure': [tenure],
-        'InternetService': [InternetService],
-        'OnlineSecurity': [OnlineSecurity],
-        'OnlineBackup': [OnlineBackup],
-        'DeviceProtection': [DeviceProtection],
-        'TechSupport': [TechSupport],
-        'StreamingTV': [StreamingTV],
-        'StreamingMovies': [StreamingMovies],
-        'Contract': [Contract],
-        'PaperlessBilling': [PaperlessBilling],
-        'PaymentMethod': [PaymentMethod],
-        'MonthlyCharges': [MonthlyCharges],
-        'TotalCharges': [TotalCharges]
     })
-    X_processed = pipeline.transform(input_df)
-    cat_encoder = pipeline.named_steps['preprocessor'].named_transformers_['cat'].named_steps['onehot']
-    cat_cols = [col for col in input_df.columns if input_df[col].dtype == 'object']
-    feature_names = [col for col in input_df.columns if input_df[col].dtype != 'object'] + list(cat_encoder.get_feature_names_out(cat_cols))
     final_df = pd.DataFrame(X_processed, columns=feature_names)
-    final_df = pd.concat([final_df.iloc[:, 3:], final_df.iloc[:, :3]], axis=1)
-    prediction_probs = model.predict_proba(final_df)[0]
     return {
-        "Prediction: CHURN ð´": prediction_probs[1],
-        "Prediction: STAY â": prediction_probs[0]
     }
-churn_tab = gr.Interface(
-    fn=predict_churn,
-    inputs=[
-        gr.Radio(['Yes', 'No'], label="Senior Citizen"),
-        gr.Radio(['Yes', 'No'], label="Partner"),
-        gr.Radio(['No', 'Yes'], label="Dependents"),
-        gr.Slider(1, 73, step=1, label="Tenure (months)"),
-        gr.Radio(['DSL', 'Fiber optic', 'No Internet'], label="Internet Service"),
-        gr.Radio(['No', 'Yes'], label="Online Security"),
-        gr.Radio(['No', 'Yes'], label="Online Backup"),
-        gr.Radio(['No', 'Yes'], label="Device Protection"),
-        gr.Radio(['No', 'Yes'], label="Tech Support"),
-        gr.Radio(['No', 'Yes'], label="Streaming TV"),
-        gr.Radio(['No', 'Yes'], label="Streaming Movies"),
-        gr.Radio(['Month-to-month', 'One year', 'Two year'], label="Contract"),
-        gr.Radio(['Yes', 'No'], label="Paperless Billing"),
-        gr.Radio(['Electronic check', 'Mailed check', 'Bank transfer (automatic)', 'Credit card (automatic)'], label="Payment Method"),
-        gr.Slider(18.4, 118.65, label="Monthly Charges")
-    ],
-    outputs=gr.Label(label="Prediction"),
-    title="Customer Churn Prediction"
-)
-# --- Launching All Tabs ---
-demo = gr.TabbedInterface(
-    interface_list=[
-        translator_tab,
-        sentiment_tab,
-        financial_tab,
-        pii_tab,
-        churn_tab
-    ],
-    tab_names=[
-        "Translator",
-        "Sentiment Analysis",
-        "Financial Analyst",
-        "Personal Info Detection",
-        "Customer Churn"
-    ]
-)
-if __name__ == '__main__':
     demo.launch()

+# app.py
 import os
 import gradio as gr
 import pandas as pd
 import numpy as np
 import joblib
 import spacy
 from langchain_core.pydantic_v1 import BaseModel, Field
 from langchain.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
 from langchain.output_parsers import PydanticOutputParser
 from langchain_openai import ChatOpenAI
+from transformers import pipeline
+### 1. Translator ###
 chat = ChatOpenAI()
 class TextTranslator(BaseModel):
+    output: str = Field(description="Translated output")
 output_parser = PydanticOutputParser(pydantic_object=TextTranslator)
 format_instructions = output_parser.get_format_instructions()
+def text_translator(input_text: str, language: str) -> str:
+    template = """Enter the text that you want to translate:
+    {input_text}, and enter the language that you want it to translate to {language}. {format_instructions}"""
+    human_prompt = HumanMessagePromptTemplate.from_template(template)
+    prompt = ChatPromptTemplate.from_messages([human_prompt]).format_prompt(
+        input_text=input_text, language=language, format_instructions=format_instructions)
+    response = chat(messages=prompt.to_messages())
+    return output_parser.parse(response.content).output
+### 2. Sentiment ###
 sentiment_model = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment")
 def sentiment_analysis(message, history):
     result = sentiment_model(message)
+    return f"Sentimiento : {result[0]['label']} (Probabilidad: {result[0]['score']:.2f})"
+### 3. Financial Analyst ###
+nlp = spacy.load('en_core_web_sm')
+nlp.add_pipe('sentencizer')
 def split_in_sentences(text):
+    return [str(sent).strip() for sent in nlp(text).sents]
 def make_spans(text, results):
+    labels = [r['label'] for r in results]
+    return list(zip(split_in_sentences(text), labels))
+auth_token = os.environ.get("HF_Token")
+asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
+def speech_to_text(audio):
+    return asr(audio)["text"]
+summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
 def summarize_text(text):
     return summarizer(text)[0]['summary_text']
+fin_model = pipeline("sentiment-analysis", model='yiyanghkust/finbert-tone')
 def text_to_sentiment(text):
     return fin_model(text)[0]["label"]
+def fin_ner(text):
+    return gr.Interface.load("dslim/bert-base-NER", src='models', use_auth_token=auth_token)(text)
 def fin_ext(text):
+    return make_spans(text, fin_model(split_in_sentences(text)))
 def fls(text):
+    model = pipeline("text-classification", model="demo-org/finbert_fls", tokenizer="demo-org/finbert_fls", use_auth_token=auth_token)
+    return make_spans(text, model(split_in_sentences(text)))
+### 4. Personal Info Detection ###
+def detect_personal_info(text):
+    model = gr.Interface.load("iiiorg/piiranha-v1-detect-personal-information")
+    return model(text)
+### 5. Customer Churn ###
 script_dir = os.path.dirname(os.path.abspath(__file__))
+pipeline_path = os.path.join(script_dir, 'toolkit', 'pipeline.joblib')
+model_path = os.path.join(script_dir, 'toolkit', 'Random Forest Classifier.joblib')
+pipeline_model = joblib.load(pipeline_path)
+model = joblib.load(model_path)
 def calculate_total_charges(tenure, monthly_charges):
     return tenure * monthly_charges
+def predict(SeniorCitizen, Partner, Dependents, tenure,
             InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
             StreamingTV, StreamingMovies, Contract, PaperlessBilling, PaymentMethod,
             MonthlyCharges):
     TotalCharges = calculate_total_charges(tenure, MonthlyCharges)
     input_df = pd.DataFrame({
+        'SeniorCitizen': [SeniorCitizen], 'Partner': [Partner], 'Dependents': [Dependents],
+        'tenure': [tenure], 'InternetService': [InternetService], 'OnlineSecurity': [OnlineSecurity],
+        'OnlineBackup': [OnlineBackup], 'DeviceProtection': [DeviceProtection], 'TechSupport': [TechSupport],
+        'StreamingTV': [StreamingTV], 'StreamingMovies': [StreamingMovies], 'Contract': [Contract],
+        'PaperlessBilling': [PaperlessBilling], 'PaymentMethod': [PaymentMethod],
+        'MonthlyCharges': [MonthlyCharges], 'TotalCharges': [TotalCharges]
     })
+    X_processed = pipeline_model.transform(input_df)
+    cat_encoder = pipeline_model.named_steps['preprocessor'].named_transformers_['cat'].named_steps['onehot']
+    feature_names = [*input_df.select_dtypes(exclude='object').columns, *cat_encoder.get_feature_names_out()]
     final_df = pd.DataFrame(X_processed, columns=feature_names)
+    pred_probs = model.predict_proba(final_df)[0]
     return {
+        "Prediction: CHURN 🔴": pred_probs[1],
+        "Prediction: STAY ✅": pred_probs[0]
     }
+### COMBINED UI ###
+with gr.Blocks() as demo:
+    with gr.Tab("Translator"):
+        gr.Markdown("## Translator")
+        input_text = gr.Textbox(label="Text to Translate")
+        language = gr.Textbox(label="Target Language")
+        output = gr.Textbox(label="Translated Text")
+        gr.Button("Translate").click(text_translator, inputs=[input_text, language], outputs=output)
+    with gr.Tab("Sentiment"):
+        gr.Markdown("## Sentiment Analysis")
+        gr.ChatInterface(sentiment_analysis, type="messages")
+    with gr.Tab("Financial Analyst"):
+        gr.Markdown("## Financial Analyst")
+        audio = gr.Audio(source="microphone", type="filepath")
+        text_input = gr.Textbox()
+        summary = gr.Textbox()
+        tone_label = gr.Label()
+        gr.Button("Speech to Text").click(speech_to_text, inputs=audio, outputs=text_input)
+        gr.Button("Summarize").click(summarize_text, inputs=text_input, outputs=summary)
+        gr.Button("Classify Tone").click(text_to_sentiment, inputs=summary, outputs=tone_label)
+        gr.HighlightedText(label="Tone").render()
+        gr.HighlightedText(label="Forward-Looking").render()
+        gr.Button("Analyze All").click(fn=fin_ext, inputs=text_input, outputs=None).click(fls, inputs=text_input, outputs=None)
+        gr.Button("Entities").click(fin_ner, inputs=text_input, outputs=None)
+    with gr.Tab("Personal Info Detector"):
+        gr.Markdown("## Detect Personal Info")
+        pi_input = gr.Textbox()
+        pi_output = gr.HighlightedText()
+        gr.Button("Detect").click(detect_personal_info, inputs=pi_input, outputs=pi_output)
+    with gr.Tab("Customer Churn"):
+        gr.Markdown("## Customer Churn Prediction")
+        inputs = [
+            gr.Radio(["Yes", "No"], label="SeniorCitizen"),
+            gr.Radio(["Yes", "No"], label="Partner"),
+            gr.Radio(["No", "Yes"], label="Dependents"),
+            gr.Slider(1, 73, step=1, label="Tenure"),
+            gr.Radio(["DSL", "Fiber optic", "No Internet"], label="InternetService"),
+            gr.Radio(["No", "Yes"], label="OnlineSecurity"),
+            gr.Radio(["No", "Yes"], label="OnlineBackup"),
+            gr.Radio(["No", "Yes"], label="DeviceProtection"),
+            gr.Radio(["No", "Yes"], label="TechSupport"),
+            gr.Radio(["No", "Yes"], label="StreamingTV"),
+            gr.Radio(["No", "Yes"], label="StreamingMovies"),
+            gr.Radio(["Month-to-month", "One year", "Two year"], label="Contract"),
+            gr.Radio(["Yes", "No"], label="PaperlessBilling"),
+            gr.Radio(["Electronic check", "Mailed check", "Bank transfer (automatic)", "Credit card (automatic)"], label="PaymentMethod"),
+            gr.Slider(18.40, 118.65, label="MonthlyCharges")
+        ]
+        churn_output = gr.Label()
+        gr.Button("Predict").click(predict, inputs=inputs, outputs=churn_output)
+if __name__ == "__main__":
     demo.launch()