Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -9,24 +9,100 @@ from langchain_openai import ChatOpenAI
|
|
9 |
from langchain_core.output_parsers import StrOutputParser
|
10 |
|
11 |
# 1. Translator
|
12 |
-
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
def
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
# 2. Sentiment Analysis
|
18 |
-
|
19 |
|
20 |
-
def
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
# 3. Financial Analyst (LangChain with OpenAI, requires API key)
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
# 4. Personal Info Detection
|
32 |
def detect_pii(text):
|
@@ -43,33 +119,144 @@ def detect_pii(text):
|
|
43 |
return found or "No personal information found."
|
44 |
|
45 |
# 5. Telco Customer Churn Prediction
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
# Gradio UI setup
|
54 |
with gr.Blocks() as demo:
|
55 |
with gr.Tab("Translator"):
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
60 |
|
61 |
with gr.Tab("Sentiment Analysis"):
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
with gr.Tab("Financial Analyst"):
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
with gr.Tab("PII Detector"):
|
75 |
pii_input = gr.Textbox(label="Text")
|
@@ -78,15 +265,66 @@ with gr.Blocks() as demo:
|
|
78 |
pii_button.click(fn=detect_pii, inputs=pii_input, outputs=pii_output)
|
79 |
|
80 |
with gr.Tab("Telco Churn Predictor"):
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
demo.launch()
|
|
|
9 |
from langchain_core.output_parsers import StrOutputParser
|
10 |
|
11 |
# 1. Translator
|
12 |
+
class TextTranslator(BaseModel):
|
13 |
+
output: str = Field(description="Python string containing the output text translated in the desired language")
|
14 |
+
|
15 |
+
output_parser = PydanticOutputParser(pydantic_object=TextTranslator)
|
16 |
+
format_instructions = output_parser.get_format_instructions()
|
17 |
|
18 |
+
def text_translator(input_text : str, language : str) -> str:
|
19 |
+
human_template = """Enter the text that you want to translate:
|
20 |
+
{input_text}, and enter the language that you want it to translate to {language}. {format_instructions}"""
|
21 |
+
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
|
22 |
+
|
23 |
+
chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
|
24 |
+
|
25 |
+
prompt = chat_prompt.format_prompt(input_text = input_text, language = language, format_instructions = format_instructions)
|
26 |
+
|
27 |
+
messages = prompt.to_messages()
|
28 |
+
|
29 |
+
response = chat(messages = messages)
|
30 |
+
|
31 |
+
output = output_parser.parse(response.content)
|
32 |
+
|
33 |
+
output_text = output.output
|
34 |
+
|
35 |
+
return output_text
|
36 |
|
37 |
# 2. Sentiment Analysis
|
38 |
+
classifier = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment")
|
39 |
|
40 |
+
def sentiment_analysis(message, history):
|
41 |
+
"""
|
42 |
+
Función para analizar el sentimiento de un mensaje.
|
43 |
+
Retorna la etiqueta de sentimiento con su probabilidad.
|
44 |
+
"""
|
45 |
+
result = classifier(message)
|
46 |
+
return f"Sentimiento : {result[0]['label']} (Probabilidad: {result[0]['score']:.2f})"
|
47 |
|
48 |
# 3. Financial Analyst (LangChain with OpenAI, requires API key)
|
49 |
+
nlp = spacy.load('en_core_web_sm')
|
50 |
+
nlp.add_pipe('sentencizer')
|
51 |
+
|
52 |
+
def split_in_sentences(text):
|
53 |
+
doc = nlp(text)
|
54 |
+
return [str(sent).strip() for sent in doc.sents]
|
55 |
+
|
56 |
+
def make_spans(text,results):
|
57 |
+
results_list = []
|
58 |
+
for i in range(len(results)):
|
59 |
+
results_list.append(results[i]['label'])
|
60 |
+
facts_spans = []
|
61 |
+
facts_spans = list(zip(split_in_sentences(text),results_list))
|
62 |
+
return facts_spans
|
63 |
+
|
64 |
+
auth_token = os.environ.get("HF_Token")
|
65 |
+
|
66 |
+
##Speech Recognition
|
67 |
+
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
|
68 |
+
def transcribe(audio):
|
69 |
+
text = asr(audio)["text"]
|
70 |
+
return text
|
71 |
+
def speech_to_text(speech):
|
72 |
+
text = asr(speech)["text"]
|
73 |
+
return text
|
74 |
+
|
75 |
+
##Summarization
|
76 |
+
summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
|
77 |
+
def summarize_text(text):
|
78 |
+
resp = summarizer(text)
|
79 |
+
stext = resp[0]['summary_text']
|
80 |
+
return stext
|
81 |
+
|
82 |
+
##Fiscal Tone Analysis
|
83 |
+
fin_model= pipeline("sentiment-analysis", model='yiyanghkust/finbert-tone', tokenizer='yiyanghkust/finbert-tone')
|
84 |
+
def text_to_sentiment(text):
|
85 |
+
sentiment = fin_model(text)[0]["label"]
|
86 |
+
return sentiment
|
87 |
+
|
88 |
+
##Company Extraction
|
89 |
+
def fin_ner(text):
|
90 |
+
api = gr.Interface.load("dslim/bert-base-NER", src='models', use_auth_token=auth_token)
|
91 |
+
replaced_spans = api(text)
|
92 |
+
return replaced_spans
|
93 |
+
|
94 |
+
##Fiscal Sentiment by Sentence
|
95 |
+
def fin_ext(text):
|
96 |
+
results = fin_model(split_in_sentences(text))
|
97 |
+
return make_spans(text,results)
|
98 |
+
|
99 |
+
##Forward Looking Statement
|
100 |
+
def fls(text):
|
101 |
+
# fls_model = pipeline("text-classification", model="yiyanghkust/finbert-fls", tokenizer="yiyanghkust/finbert-fls")
|
102 |
+
fls_model = pipeline("text-classification", model="demo-org/finbert_fls", tokenizer="demo-org/finbert_fls", use_auth_token=auth_token)
|
103 |
+
results = fls_model(split_in_sentences(text))
|
104 |
+
return make_spans(text,results)
|
105 |
+
|
106 |
|
107 |
# 4. Personal Info Detection
|
108 |
def detect_pii(text):
|
|
|
119 |
return found or "No personal information found."
|
120 |
|
121 |
# 5. Telco Customer Churn Prediction
|
122 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
123 |
+
pipeline_path = os.path.join(script_dir, 'toolkit', 'pipeline.joblib')
|
124 |
+
model_path = os.path.join(script_dir, 'toolkit', 'Random Forest Classifier.joblib')
|
125 |
+
|
126 |
+
# Load transformation pipeline and model
|
127 |
+
pipeline = joblib.load(pipeline_path)
|
128 |
+
model = joblib.load(model_path)
|
129 |
+
|
130 |
+
# Create a function to calculate TotalCharges
|
131 |
+
def calculate_total_charges(tenure, monthly_charges):
|
132 |
+
return tenure * monthly_charges
|
133 |
+
|
134 |
+
# Create a function that applies the ML pipeline and makes predictions
|
135 |
+
def predict(SeniorCitizen, Partner, Dependents, tenure,
|
136 |
+
InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
|
137 |
+
StreamingTV, StreamingMovies, Contract, PaperlessBilling, PaymentMethod,
|
138 |
+
MonthlyCharges):
|
139 |
+
|
140 |
+
# Calculate TotalCharges
|
141 |
+
TotalCharges = calculate_total_charges(tenure, MonthlyCharges)
|
142 |
+
|
143 |
+
# Create a dataframe with the input data
|
144 |
+
input_df = pd.DataFrame({
|
145 |
+
'SeniorCitizen': [SeniorCitizen],
|
146 |
+
'Partner': [Partner],
|
147 |
+
'Dependents': [Dependents],
|
148 |
+
'tenure': [tenure],
|
149 |
+
'InternetService': [InternetService],
|
150 |
+
'OnlineSecurity': [OnlineSecurity],
|
151 |
+
'OnlineBackup': [OnlineBackup],
|
152 |
+
'DeviceProtection': [DeviceProtection],
|
153 |
+
'TechSupport': [TechSupport],
|
154 |
+
'StreamingTV': [StreamingTV],
|
155 |
+
'StreamingMovies': [StreamingMovies],
|
156 |
+
'Contract': [Contract],
|
157 |
+
'PaperlessBilling': [PaperlessBilling],
|
158 |
+
'PaymentMethod': [PaymentMethod],
|
159 |
+
'MonthlyCharges': [MonthlyCharges],
|
160 |
+
'TotalCharges': [TotalCharges]
|
161 |
+
})
|
162 |
+
|
163 |
+
# Selecting categorical and numerical columns separately
|
164 |
+
cat_cols = [col for col in input_df.columns if input_df[col].dtype == 'object']
|
165 |
+
num_cols = [col for col in input_df.columns if input_df[col].dtype != 'object']
|
166 |
+
|
167 |
+
X_processed = pipeline.transform(input_df)
|
168 |
+
|
169 |
+
# Extracting feature names for categorical columns after one-hot encoding
|
170 |
+
cat_encoder = pipeline.named_steps['preprocessor'].named_transformers_['cat'].named_steps['onehot']
|
171 |
+
cat_feature_names = cat_encoder.get_feature_names_out(cat_cols)
|
172 |
+
|
173 |
+
# Concatenating numerical and categorical feature names
|
174 |
+
feature_names = num_cols + list(cat_feature_names)
|
175 |
+
|
176 |
+
# Convert X_processed to DataFrame
|
177 |
+
final_df = pd.DataFrame(X_processed, columns=feature_names)
|
178 |
+
|
179 |
+
# Extract the first three columns and remaining columns, then merge
|
180 |
+
first_three_columns = final_df.iloc[:, :3]
|
181 |
+
remaining_columns = final_df.iloc[:, 3:]
|
182 |
+
final_df = pd.concat([remaining_columns, first_three_columns], axis=1)
|
183 |
+
|
184 |
+
# Make predictions using the model
|
185 |
+
prediction_probs = model.predict_proba(final_df)[0]
|
186 |
+
prediction_label = {
|
187 |
+
"Prediction: CHURN 🔴": prediction_probs[1],
|
188 |
+
"Prediction: STAY ✅": prediction_probs[0]
|
189 |
+
}
|
190 |
+
|
191 |
+
return prediction_label
|
192 |
+
|
193 |
+
input_interface = []
|
194 |
|
195 |
# Gradio UI setup
|
196 |
with gr.Blocks() as demo:
|
197 |
with gr.Tab("Translator"):
|
198 |
+
gr.HTML("<h1 align = 'center'> Text Translator </h1>")
|
199 |
+
gr.HTML("<h4 align = 'center'> Translate to any language </h4>")
|
200 |
+
|
201 |
+
inputs = [gr.Textbox(label = "Enter the text that you want to translate"), gr.Textbox(label = "Enter the language that you want it to translate to", placeholder = "Example : Hindi,French,Bengali,etc")]
|
202 |
+
generate_btn = gr.Button(value = 'Generate')
|
203 |
+
outputs = [gr.Textbox(label = "Translated text")]
|
204 |
+
generate_btn.click(fn = text_translator, inputs= inputs, outputs = outputs)
|
205 |
|
206 |
with gr.Tab("Sentiment Analysis"):
|
207 |
+
gr.Markdown("""
|
208 |
+
# Análisis de Sentimientos
|
209 |
+
Esta aplicación utiliza un modelo de Machine Learning para analizar el sentimiento de los mensajes ingresados.
|
210 |
+
Puede detectar si un texto es positivo, negativo o neutral con su respectiva probabilidad.
|
211 |
+
""")
|
212 |
+
|
213 |
+
chat = gr.ChatInterface(sentiment_analysis, type="messages")
|
214 |
+
|
215 |
+
gr.Markdown("""
|
216 |
+
---
|
217 |
+
### Conéctate conmigo:
|
218 |
+
[Instagram 📸](https://www.instagram.com/srjosueaaron/)
|
219 |
+
|
220 |
+
[TikTok 🎵](https://www.tiktok.com/@srjosueaaron)
|
221 |
+
|
222 |
+
[YouTube 🎬](https://www.youtube.com/@srjosueaaron)
|
223 |
+
---
|
224 |
+
Demostración de Análisis de Sentimientos usando el modelo de [CardiffNLP](https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment).
|
225 |
+
|
226 |
+
Desarrollado con ❤️ por [@srjosueaaron](https://www.instagram.com/srjosueaaron/).
|
227 |
+
""")
|
228 |
|
229 |
with gr.Tab("Financial Analyst"):
|
230 |
+
gr.Markdown("## Financial Analyst AI")
|
231 |
+
gr.Markdown("This project applies AI trained by our financial analysts to analyze earning calls and other financial documents.")
|
232 |
+
with gr.Row():
|
233 |
+
with gr.Column():
|
234 |
+
audio_file = gr.inputs.Audio(source="microphone", type="filepath")
|
235 |
+
with gr.Row():
|
236 |
+
b1 = gr.Button("Recognize Speech")
|
237 |
+
with gr.Row():
|
238 |
+
text = gr.Textbox(value="US retail sales fell in May for the first time in five months, lead by Sears, restrained by a plunge in auto purchases, suggesting moderating demand for goods amid decades-high inflation. The value of overall retail purchases decreased 0.3%, after a downwardly revised 0.7% gain in April, Commerce Department figures showed Wednesday. Excluding Tesla vehicles, sales rose 0.5% last month. The department expects inflation to continue to rise.")
|
239 |
+
b1.click(speech_to_text, inputs=audio_file, outputs=text)
|
240 |
+
with gr.Row():
|
241 |
+
b2 = gr.Button("Summarize Text")
|
242 |
+
stext = gr.Textbox()
|
243 |
+
b2.click(summarize_text, inputs=text, outputs=stext)
|
244 |
+
with gr.Row():
|
245 |
+
b3 = gr.Button("Classify Financial Tone")
|
246 |
+
label = gr.Label()
|
247 |
+
b3.click(text_to_sentiment, inputs=stext, outputs=label)
|
248 |
+
with gr.Column():
|
249 |
+
b5 = gr.Button("Financial Tone and Forward Looking Statement Analysis")
|
250 |
+
with gr.Row():
|
251 |
+
fin_spans = gr.HighlightedText()
|
252 |
+
b5.click(fin_ext, inputs=text, outputs=fin_spans)
|
253 |
+
with gr.Row():
|
254 |
+
fls_spans = gr.HighlightedText()
|
255 |
+
b5.click(fls, inputs=text, outputs=fls_spans)
|
256 |
+
with gr.Row():
|
257 |
+
b4 = gr.Button("Identify Companies & Locations")
|
258 |
+
replaced_spans = gr.HighlightedText()
|
259 |
+
b4.click(fin_ner, inputs=text, outputs=replaced_spans)
|
260 |
|
261 |
with gr.Tab("PII Detector"):
|
262 |
pii_input = gr.Textbox(label="Text")
|
|
|
265 |
pii_button.click(fn=detect_pii, inputs=pii_input, outputs=pii_output)
|
266 |
|
267 |
with gr.Tab("Telco Churn Predictor"):
|
268 |
+
Title = gr.Label('Customer Churn Prediction App')
|
269 |
+
|
270 |
+
with gr.Row():
|
271 |
+
Title
|
272 |
+
|
273 |
+
with gr.Row():
|
274 |
+
gr.Markdown("This app predicts likelihood of a customer to leave or stay with the company")
|
275 |
+
|
276 |
+
with gr.Row():
|
277 |
+
with gr.Column():
|
278 |
+
input_interface_column_1 = [
|
279 |
+
gr.components.Radio(['Yes', 'No'], label="Are you a Seniorcitizen?"),
|
280 |
+
gr.components.Radio(['Yes', 'No'], label='Do you have Partner?'),
|
281 |
+
gr.components.Radio(['No', 'Yes'], label='Do you have any Dependents?'),
|
282 |
+
gr.components.Slider(label='Enter lenghth of Tenure in Months', minimum=1, maximum=73, step=1),
|
283 |
+
gr.components.Radio(['DSL', 'Fiber optic', 'No Internet'], label='What is your Internet Service?'),
|
284 |
+
gr.components.Radio(['No', 'Yes'], label='Do you have Online Security?'),
|
285 |
+
gr.components.Radio(['No', 'Yes'], label='Do you have Online Backup?'),
|
286 |
+
gr.components.Radio(['No', 'Yes'], label='Do you have Device Protection?')
|
287 |
+
]
|
288 |
+
|
289 |
+
with gr.Column():
|
290 |
+
input_interface_column_2 = [
|
291 |
+
gr.components.Radio(['No', 'Yes'], label='Do you have Tech Support?'),
|
292 |
+
gr.components.Radio(['No', 'Yes'], label='Do you have Streaming TV?'),
|
293 |
+
gr.components.Radio(['No', 'Yes'], label='Do you have Streaming Movies?'),
|
294 |
+
gr.components.Radio(['Month-to-month', 'One year', 'Two year'], label='What is your Contract Type?'),
|
295 |
+
gr.components.Radio(['Yes', 'No'], label='Do you prefer Paperless Billing?'),
|
296 |
+
gr.components.Radio(['Electronic check', 'Mailed check', 'Bank transfer (automatic)', 'Credit card (automatic)'], label='Which PaymentMethod do you prefer?'),
|
297 |
+
gr.components.Slider(label="Enter monthly charges", minimum=18.40, maximum=118.65)
|
298 |
+
]
|
299 |
+
|
300 |
+
with gr.Row():
|
301 |
+
input_interface.extend(input_interface_column_1)
|
302 |
+
input_interface.extend(input_interface_column_2)
|
303 |
+
|
304 |
+
with gr.Row():
|
305 |
+
predict_btn = gr.Button('Predict')
|
306 |
+
output_interface = gr.Label(label="churn")
|
307 |
+
|
308 |
+
with gr.Accordion("Open for information on inputs", open=False):
|
309 |
+
gr.Markdown("""This app receives the following as inputs and processes them to return the prediction on whether a customer, will churn or not.
|
310 |
+
|
311 |
+
- SeniorCitizen: Whether a customer is a senior citizen or not
|
312 |
+
- Partner: Whether the customer has a partner or not (Yes, No)
|
313 |
+
- Dependents: Whether the customer has dependents or not (Yes, No)
|
314 |
+
- Tenure: Number of months the customer has stayed with the company
|
315 |
+
- InternetService: Customer's internet service provider (DSL, Fiber Optic, No)
|
316 |
+
- OnlineSecurity: Whether the customer has online security or not (Yes, No, No Internet)
|
317 |
+
- OnlineBackup: Whether the customer has online backup or not (Yes, No, No Internet)
|
318 |
+
- DeviceProtection: Whether the customer has device protection or not (Yes, No, No internet service)
|
319 |
+
- TechSupport: Whether the customer has tech support or not (Yes, No, No internet)
|
320 |
+
- StreamingTV: Whether the customer has streaming TV or not (Yes, No, No internet service)
|
321 |
+
- StreamingMovies: Whether the customer has streaming movies or not (Yes, No, No Internet service)
|
322 |
+
- Contract: The contract term of the customer (Month-to-Month, One year, Two year)
|
323 |
+
- PaperlessBilling: Whether the customer has paperless billing or not (Yes, No)
|
324 |
+
- Payment Method: The customer's payment method (Electronic check, mailed check, Bank transfer(automatic), Credit card(automatic))
|
325 |
+
- MonthlyCharges: The amount charged to the customer monthly
|
326 |
+
""")
|
327 |
+
|
328 |
+
predict_btn.click(fn=predict, inputs=input_interface, outputs=output_interface)
|
329 |
|
330 |
demo.launch()
|