IS361Group4 commited on
Commit
82c5ddf
·
verified ·
1 Parent(s): c505d11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +279 -41
app.py CHANGED
@@ -9,24 +9,100 @@ from langchain_openai import ChatOpenAI
9
  from langchain_core.output_parsers import StrOutputParser
10
 
11
  # 1. Translator
12
- translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-fr")
 
 
 
 
13
 
14
- def translate_text(text):
15
- return translator(text)[0]['translation_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  # 2. Sentiment Analysis
18
- sentiment = pipeline("sentiment-analysis")
19
 
20
- def analyze_sentiment(text):
21
- return sentiment(text)[0]
 
 
 
 
 
22
 
23
  # 3. Financial Analyst (LangChain with OpenAI, requires API key)
24
- def financial_analysis(text, api_key):
25
- chat = ChatOpenAI(api_key=api_key)
26
- template = "Analyze the financial context of this text:\n\n{text}"
27
- prompt = PromptTemplate.from_template(template)
28
- chain = LLMChain(llm=chat, prompt=prompt, output_parser=StrOutputParser())
29
- return chain.run({"text": text})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  # 4. Personal Info Detection
32
  def detect_pii(text):
@@ -43,33 +119,144 @@ def detect_pii(text):
43
  return found or "No personal information found."
44
 
45
  # 5. Telco Customer Churn Prediction
46
- model = joblib.load("model.joblib")
47
- def churn_prediction(gender, SeniorCitizen, Partner, tenure, MonthlyCharges):
48
- input_df = pd.DataFrame([[gender, SeniorCitizen, Partner, tenure, MonthlyCharges]],
49
- columns=["gender", "SeniorCitizen", "Partner", "tenure", "MonthlyCharges"])
50
- prediction = model.predict(input_df)[0]
51
- return "Churn" if prediction == 1 else "Not Churn"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  # Gradio UI setup
54
  with gr.Blocks() as demo:
55
  with gr.Tab("Translator"):
56
- input_text = gr.Textbox(label="Input Text")
57
- output_text = gr.Textbox(label="Translated Text")
58
- translate_button = gr.Button("Translate")
59
- translate_button.click(fn=translate_text, inputs=input_text, outputs=output_text)
 
 
 
60
 
61
  with gr.Tab("Sentiment Analysis"):
62
- sentiment_input = gr.Textbox(label="Text")
63
- sentiment_output = gr.Textbox(label="Sentiment")
64
- sentiment_button = gr.Button("Analyze")
65
- sentiment_button.click(fn=analyze_sentiment, inputs=sentiment_input, outputs=sentiment_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  with gr.Tab("Financial Analyst"):
68
- finance_input = gr.Textbox(label="Financial Text")
69
- api_key_input = gr.Textbox(label="OpenAI API Key", type="password")
70
- finance_output = gr.Textbox(label="Analysis")
71
- finance_button = gr.Button("Analyze")
72
- finance_button.click(fn=financial_analysis, inputs=[finance_input, api_key_input], outputs=finance_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  with gr.Tab("PII Detector"):
75
  pii_input = gr.Textbox(label="Text")
@@ -78,15 +265,66 @@ with gr.Blocks() as demo:
78
  pii_button.click(fn=detect_pii, inputs=pii_input, outputs=pii_output)
79
 
80
  with gr.Tab("Telco Churn Predictor"):
81
- gender = gr.Dropdown(choices=["Male", "Female"], label="Gender")
82
- senior = gr.Dropdown(choices=[0, 1], label="Senior Citizen")
83
- partner = gr.Dropdown(choices=["Yes", "No"], label="Partner")
84
- tenure = gr.Number(label="Tenure (months)")
85
- charges = gr.Number(label="Monthly Charges")
86
- churn_output = gr.Textbox(label="Prediction")
87
- churn_button = gr.Button("Predict")
88
- churn_button.click(fn=churn_prediction,
89
- inputs=[gender, senior, partner, tenure, charges],
90
- outputs=churn_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  demo.launch()
 
9
  from langchain_core.output_parsers import StrOutputParser
10
 
11
  # 1. Translator
12
+ class TextTranslator(BaseModel):
13
+ output: str = Field(description="Python string containing the output text translated in the desired language")
14
+
15
+ output_parser = PydanticOutputParser(pydantic_object=TextTranslator)
16
+ format_instructions = output_parser.get_format_instructions()
17
 
18
+ def text_translator(input_text : str, language : str) -> str:
19
+ human_template = """Enter the text that you want to translate:
20
+ {input_text}, and enter the language that you want it to translate to {language}. {format_instructions}"""
21
+ human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
22
+
23
+ chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
24
+
25
+ prompt = chat_prompt.format_prompt(input_text = input_text, language = language, format_instructions = format_instructions)
26
+
27
+ messages = prompt.to_messages()
28
+
29
+ response = chat(messages = messages)
30
+
31
+ output = output_parser.parse(response.content)
32
+
33
+ output_text = output.output
34
+
35
+ return output_text
36
 
37
  # 2. Sentiment Analysis
38
+ classifier = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment")
39
 
40
+ def sentiment_analysis(message, history):
41
+ """
42
+ Función para analizar el sentimiento de un mensaje.
43
+ Retorna la etiqueta de sentimiento con su probabilidad.
44
+ """
45
+ result = classifier(message)
46
+ return f"Sentimiento : {result[0]['label']} (Probabilidad: {result[0]['score']:.2f})"
47
 
48
  # 3. Financial Analyst (LangChain with OpenAI, requires API key)
49
+ nlp = spacy.load('en_core_web_sm')
50
+ nlp.add_pipe('sentencizer')
51
+
52
+ def split_in_sentences(text):
53
+ doc = nlp(text)
54
+ return [str(sent).strip() for sent in doc.sents]
55
+
56
+ def make_spans(text,results):
57
+ results_list = []
58
+ for i in range(len(results)):
59
+ results_list.append(results[i]['label'])
60
+ facts_spans = []
61
+ facts_spans = list(zip(split_in_sentences(text),results_list))
62
+ return facts_spans
63
+
64
+ auth_token = os.environ.get("HF_Token")
65
+
66
+ ##Speech Recognition
67
+ asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
68
+ def transcribe(audio):
69
+ text = asr(audio)["text"]
70
+ return text
71
+ def speech_to_text(speech):
72
+ text = asr(speech)["text"]
73
+ return text
74
+
75
+ ##Summarization
76
+ summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
77
+ def summarize_text(text):
78
+ resp = summarizer(text)
79
+ stext = resp[0]['summary_text']
80
+ return stext
81
+
82
+ ##Fiscal Tone Analysis
83
+ fin_model= pipeline("sentiment-analysis", model='yiyanghkust/finbert-tone', tokenizer='yiyanghkust/finbert-tone')
84
+ def text_to_sentiment(text):
85
+ sentiment = fin_model(text)[0]["label"]
86
+ return sentiment
87
+
88
+ ##Company Extraction
89
+ def fin_ner(text):
90
+ api = gr.Interface.load("dslim/bert-base-NER", src='models', use_auth_token=auth_token)
91
+ replaced_spans = api(text)
92
+ return replaced_spans
93
+
94
+ ##Fiscal Sentiment by Sentence
95
+ def fin_ext(text):
96
+ results = fin_model(split_in_sentences(text))
97
+ return make_spans(text,results)
98
+
99
+ ##Forward Looking Statement
100
+ def fls(text):
101
+ # fls_model = pipeline("text-classification", model="yiyanghkust/finbert-fls", tokenizer="yiyanghkust/finbert-fls")
102
+ fls_model = pipeline("text-classification", model="demo-org/finbert_fls", tokenizer="demo-org/finbert_fls", use_auth_token=auth_token)
103
+ results = fls_model(split_in_sentences(text))
104
+ return make_spans(text,results)
105
+
106
 
107
  # 4. Personal Info Detection
108
  def detect_pii(text):
 
119
  return found or "No personal information found."
120
 
121
  # 5. Telco Customer Churn Prediction
122
+ script_dir = os.path.dirname(os.path.abspath(__file__))
123
+ pipeline_path = os.path.join(script_dir, 'toolkit', 'pipeline.joblib')
124
+ model_path = os.path.join(script_dir, 'toolkit', 'Random Forest Classifier.joblib')
125
+
126
+ # Load transformation pipeline and model
127
+ pipeline = joblib.load(pipeline_path)
128
+ model = joblib.load(model_path)
129
+
130
+ # Create a function to calculate TotalCharges
131
+ def calculate_total_charges(tenure, monthly_charges):
132
+ return tenure * monthly_charges
133
+
134
+ # Create a function that applies the ML pipeline and makes predictions
135
+ def predict(SeniorCitizen, Partner, Dependents, tenure,
136
+ InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
137
+ StreamingTV, StreamingMovies, Contract, PaperlessBilling, PaymentMethod,
138
+ MonthlyCharges):
139
+
140
+ # Calculate TotalCharges
141
+ TotalCharges = calculate_total_charges(tenure, MonthlyCharges)
142
+
143
+ # Create a dataframe with the input data
144
+ input_df = pd.DataFrame({
145
+ 'SeniorCitizen': [SeniorCitizen],
146
+ 'Partner': [Partner],
147
+ 'Dependents': [Dependents],
148
+ 'tenure': [tenure],
149
+ 'InternetService': [InternetService],
150
+ 'OnlineSecurity': [OnlineSecurity],
151
+ 'OnlineBackup': [OnlineBackup],
152
+ 'DeviceProtection': [DeviceProtection],
153
+ 'TechSupport': [TechSupport],
154
+ 'StreamingTV': [StreamingTV],
155
+ 'StreamingMovies': [StreamingMovies],
156
+ 'Contract': [Contract],
157
+ 'PaperlessBilling': [PaperlessBilling],
158
+ 'PaymentMethod': [PaymentMethod],
159
+ 'MonthlyCharges': [MonthlyCharges],
160
+ 'TotalCharges': [TotalCharges]
161
+ })
162
+
163
+ # Selecting categorical and numerical columns separately
164
+ cat_cols = [col for col in input_df.columns if input_df[col].dtype == 'object']
165
+ num_cols = [col for col in input_df.columns if input_df[col].dtype != 'object']
166
+
167
+ X_processed = pipeline.transform(input_df)
168
+
169
+ # Extracting feature names for categorical columns after one-hot encoding
170
+ cat_encoder = pipeline.named_steps['preprocessor'].named_transformers_['cat'].named_steps['onehot']
171
+ cat_feature_names = cat_encoder.get_feature_names_out(cat_cols)
172
+
173
+ # Concatenating numerical and categorical feature names
174
+ feature_names = num_cols + list(cat_feature_names)
175
+
176
+ # Convert X_processed to DataFrame
177
+ final_df = pd.DataFrame(X_processed, columns=feature_names)
178
+
179
+ # Extract the first three columns and remaining columns, then merge
180
+ first_three_columns = final_df.iloc[:, :3]
181
+ remaining_columns = final_df.iloc[:, 3:]
182
+ final_df = pd.concat([remaining_columns, first_three_columns], axis=1)
183
+
184
+ # Make predictions using the model
185
+ prediction_probs = model.predict_proba(final_df)[0]
186
+ prediction_label = {
187
+ "Prediction: CHURN 🔴": prediction_probs[1],
188
+ "Prediction: STAY ✅": prediction_probs[0]
189
+ }
190
+
191
+ return prediction_label
192
+
193
+ input_interface = []
194
 
195
  # Gradio UI setup
196
  with gr.Blocks() as demo:
197
  with gr.Tab("Translator"):
198
+ gr.HTML("<h1 align = 'center'> Text Translator </h1>")
199
+ gr.HTML("<h4 align = 'center'> Translate to any language </h4>")
200
+
201
+ inputs = [gr.Textbox(label = "Enter the text that you want to translate"), gr.Textbox(label = "Enter the language that you want it to translate to", placeholder = "Example : Hindi,French,Bengali,etc")]
202
+ generate_btn = gr.Button(value = 'Generate')
203
+ outputs = [gr.Textbox(label = "Translated text")]
204
+ generate_btn.click(fn = text_translator, inputs= inputs, outputs = outputs)
205
 
206
  with gr.Tab("Sentiment Analysis"):
207
+ gr.Markdown("""
208
+ # Análisis de Sentimientos
209
+ Esta aplicación utiliza un modelo de Machine Learning para analizar el sentimiento de los mensajes ingresados.
210
+ Puede detectar si un texto es positivo, negativo o neutral con su respectiva probabilidad.
211
+ """)
212
+
213
+ chat = gr.ChatInterface(sentiment_analysis, type="messages")
214
+
215
+ gr.Markdown("""
216
+ ---
217
+ ### Conéctate conmigo:
218
+ [Instagram 📸](https://www.instagram.com/srjosueaaron/)
219
+
220
+ [TikTok 🎵](https://www.tiktok.com/@srjosueaaron)
221
+
222
+ [YouTube 🎬](https://www.youtube.com/@srjosueaaron)
223
+ ---
224
+ Demostración de Análisis de Sentimientos usando el modelo de [CardiffNLP](https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment).
225
+
226
+ Desarrollado con ❤️ por [@srjosueaaron](https://www.instagram.com/srjosueaaron/).
227
+ """)
228
 
229
  with gr.Tab("Financial Analyst"):
230
+ gr.Markdown("## Financial Analyst AI")
231
+ gr.Markdown("This project applies AI trained by our financial analysts to analyze earning calls and other financial documents.")
232
+ with gr.Row():
233
+ with gr.Column():
234
+ audio_file = gr.inputs.Audio(source="microphone", type="filepath")
235
+ with gr.Row():
236
+ b1 = gr.Button("Recognize Speech")
237
+ with gr.Row():
238
+ text = gr.Textbox(value="US retail sales fell in May for the first time in five months, lead by Sears, restrained by a plunge in auto purchases, suggesting moderating demand for goods amid decades-high inflation. The value of overall retail purchases decreased 0.3%, after a downwardly revised 0.7% gain in April, Commerce Department figures showed Wednesday. Excluding Tesla vehicles, sales rose 0.5% last month. The department expects inflation to continue to rise.")
239
+ b1.click(speech_to_text, inputs=audio_file, outputs=text)
240
+ with gr.Row():
241
+ b2 = gr.Button("Summarize Text")
242
+ stext = gr.Textbox()
243
+ b2.click(summarize_text, inputs=text, outputs=stext)
244
+ with gr.Row():
245
+ b3 = gr.Button("Classify Financial Tone")
246
+ label = gr.Label()
247
+ b3.click(text_to_sentiment, inputs=stext, outputs=label)
248
+ with gr.Column():
249
+ b5 = gr.Button("Financial Tone and Forward Looking Statement Analysis")
250
+ with gr.Row():
251
+ fin_spans = gr.HighlightedText()
252
+ b5.click(fin_ext, inputs=text, outputs=fin_spans)
253
+ with gr.Row():
254
+ fls_spans = gr.HighlightedText()
255
+ b5.click(fls, inputs=text, outputs=fls_spans)
256
+ with gr.Row():
257
+ b4 = gr.Button("Identify Companies & Locations")
258
+ replaced_spans = gr.HighlightedText()
259
+ b4.click(fin_ner, inputs=text, outputs=replaced_spans)
260
 
261
  with gr.Tab("PII Detector"):
262
  pii_input = gr.Textbox(label="Text")
 
265
  pii_button.click(fn=detect_pii, inputs=pii_input, outputs=pii_output)
266
 
267
  with gr.Tab("Telco Churn Predictor"):
268
+ Title = gr.Label('Customer Churn Prediction App')
269
+
270
+ with gr.Row():
271
+ Title
272
+
273
+ with gr.Row():
274
+ gr.Markdown("This app predicts likelihood of a customer to leave or stay with the company")
275
+
276
+ with gr.Row():
277
+ with gr.Column():
278
+ input_interface_column_1 = [
279
+ gr.components.Radio(['Yes', 'No'], label="Are you a Seniorcitizen?"),
280
+ gr.components.Radio(['Yes', 'No'], label='Do you have Partner?'),
281
+ gr.components.Radio(['No', 'Yes'], label='Do you have any Dependents?'),
282
+ gr.components.Slider(label='Enter lenghth of Tenure in Months', minimum=1, maximum=73, step=1),
283
+ gr.components.Radio(['DSL', 'Fiber optic', 'No Internet'], label='What is your Internet Service?'),
284
+ gr.components.Radio(['No', 'Yes'], label='Do you have Online Security?'),
285
+ gr.components.Radio(['No', 'Yes'], label='Do you have Online Backup?'),
286
+ gr.components.Radio(['No', 'Yes'], label='Do you have Device Protection?')
287
+ ]
288
+
289
+ with gr.Column():
290
+ input_interface_column_2 = [
291
+ gr.components.Radio(['No', 'Yes'], label='Do you have Tech Support?'),
292
+ gr.components.Radio(['No', 'Yes'], label='Do you have Streaming TV?'),
293
+ gr.components.Radio(['No', 'Yes'], label='Do you have Streaming Movies?'),
294
+ gr.components.Radio(['Month-to-month', 'One year', 'Two year'], label='What is your Contract Type?'),
295
+ gr.components.Radio(['Yes', 'No'], label='Do you prefer Paperless Billing?'),
296
+ gr.components.Radio(['Electronic check', 'Mailed check', 'Bank transfer (automatic)', 'Credit card (automatic)'], label='Which PaymentMethod do you prefer?'),
297
+ gr.components.Slider(label="Enter monthly charges", minimum=18.40, maximum=118.65)
298
+ ]
299
+
300
+ with gr.Row():
301
+ input_interface.extend(input_interface_column_1)
302
+ input_interface.extend(input_interface_column_2)
303
+
304
+ with gr.Row():
305
+ predict_btn = gr.Button('Predict')
306
+ output_interface = gr.Label(label="churn")
307
+
308
+ with gr.Accordion("Open for information on inputs", open=False):
309
+ gr.Markdown("""This app receives the following as inputs and processes them to return the prediction on whether a customer, will churn or not.
310
+
311
+ - SeniorCitizen: Whether a customer is a senior citizen or not
312
+ - Partner: Whether the customer has a partner or not (Yes, No)
313
+ - Dependents: Whether the customer has dependents or not (Yes, No)
314
+ - Tenure: Number of months the customer has stayed with the company
315
+ - InternetService: Customer's internet service provider (DSL, Fiber Optic, No)
316
+ - OnlineSecurity: Whether the customer has online security or not (Yes, No, No Internet)
317
+ - OnlineBackup: Whether the customer has online backup or not (Yes, No, No Internet)
318
+ - DeviceProtection: Whether the customer has device protection or not (Yes, No, No internet service)
319
+ - TechSupport: Whether the customer has tech support or not (Yes, No, No internet)
320
+ - StreamingTV: Whether the customer has streaming TV or not (Yes, No, No internet service)
321
+ - StreamingMovies: Whether the customer has streaming movies or not (Yes, No, No Internet service)
322
+ - Contract: The contract term of the customer (Month-to-Month, One year, Two year)
323
+ - PaperlessBilling: Whether the customer has paperless billing or not (Yes, No)
324
+ - Payment Method: The customer's payment method (Electronic check, mailed check, Bank transfer(automatic), Credit card(automatic))
325
+ - MonthlyCharges: The amount charged to the customer monthly
326
+ """)
327
+
328
+ predict_btn.click(fn=predict, inputs=input_interface, outputs=output_interface)
329
 
330
  demo.launch()