jayebaku commited on
Commit
736168d
·
verified ·
1 Parent(s): c78b1ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -37
app.py CHANGED
@@ -5,7 +5,7 @@ import pandas as pd
5
 
6
  from classifier import classify
7
  from statistics import mean
8
- from genra_incremental import GenraPipeline
9
  from qa_summary import generate_answer
10
 
11
 
@@ -88,51 +88,51 @@ def add_query(to_add, history):
88
  history.append(to_add)
89
  return gr.CheckboxGroup(choices=history), history
90
 
91
- def qa_process(selected_queries, qa_llm_model, aggregator,
92
- batch_size, topk, text_field, data_df):
93
 
94
- emb_model = 'multi-qa-mpnet-base-dot-v1'
95
- contexts = []
96
 
97
- queries_df = pd.DataFrame({'id':[j for j in range(len(selected_queries))],'query': selected_queries})
98
- qa_input_df = data_df[data_df["model_label"] != "none"].reset_index()
99
 
100
- tweets_df = qa_input_df[[text_field]]
101
- tweets_df.reset_index(inplace=True)
102
- tweets_df.rename(columns={"index": "order", text_field: "text"},inplace=True)
103
 
104
- gr.Info("Loading GENRA pipeline....")
105
- genra = GenraPipeline(qa_llm_model, emb_model, aggregator, contexts)
106
- gr.Info("Waiting for data...")
107
- batches = [tweets_df[i:i+batch_size] for i in range(0,len(tweets_df),batch_size)]
108
 
109
- genra_answers = []
110
- summarize_batch = True
111
- for batch_number, tweets in enumerate(batches):
112
- gr.Info(f"Populating index for batch {batch_number}")
113
- genra.qa_indexer.index_dataframe(tweets)
114
- gr.Info(f"Performing retrieval for batch {batch_number}")
115
- genra.retrieval(batch_number, queries_df, topk, summarize_batch)
116
 
117
- gr.Info("Processed all batches!")
118
 
119
- gr.Info("Getting summary...")
120
- summary = genra.summarize_history(queries_df)
121
 
122
- gr.Info("Preparing results...")
123
- results = genra.answers_store
124
- final_answers, q_a = [], []
125
- for q, g_answers in results.items():
126
- for answer in g_answers:
127
- final_answers.append({'question':q, "tweets":answer['tweets'], "batch":answer['batch_number'], "summary":answer['summary'] })
128
- for t in answer['tweets']:
129
- q_a.append((q,t))
130
- answers_df = pd.DataFrame.from_dict(final_answers)
131
- q_a = list(set(q_a))
132
- q_a_df = pd.DataFrame(q_a, columns =['question', 'tweet'])
133
- q_a_df = q_a_df.sort_values(by=["question"], ascending=False)
134
 
135
- return q_a_df, answers_df, summary
136
 
137
  def qa_summarise(selected_queries, qa_llm_model, text_field, data_df):
138
 
 
5
 
6
  from classifier import classify
7
  from statistics import mean
8
+ # from genra_incremental import GenraPipeline
9
  from qa_summary import generate_answer
10
 
11
 
 
88
  history.append(to_add)
89
  return gr.CheckboxGroup(choices=history), history
90
 
91
+ # def qa_process(selected_queries, qa_llm_model, aggregator,
92
+ # batch_size, topk, text_field, data_df):
93
 
94
+ # emb_model = 'multi-qa-mpnet-base-dot-v1'
95
+ # contexts = []
96
 
97
+ # queries_df = pd.DataFrame({'id':[j for j in range(len(selected_queries))],'query': selected_queries})
98
+ # qa_input_df = data_df[data_df["model_label"] != "none"].reset_index()
99
 
100
+ # tweets_df = qa_input_df[[text_field]]
101
+ # tweets_df.reset_index(inplace=True)
102
+ # tweets_df.rename(columns={"index": "order", text_field: "text"},inplace=True)
103
 
104
+ # gr.Info("Loading GENRA pipeline....")
105
+ # genra = GenraPipeline(qa_llm_model, emb_model, aggregator, contexts)
106
+ # gr.Info("Waiting for data...")
107
+ # batches = [tweets_df[i:i+batch_size] for i in range(0,len(tweets_df),batch_size)]
108
 
109
+ # genra_answers = []
110
+ # summarize_batch = True
111
+ # for batch_number, tweets in enumerate(batches):
112
+ # gr.Info(f"Populating index for batch {batch_number}")
113
+ # genra.qa_indexer.index_dataframe(tweets)
114
+ # gr.Info(f"Performing retrieval for batch {batch_number}")
115
+ # genra.retrieval(batch_number, queries_df, topk, summarize_batch)
116
 
117
+ # gr.Info("Processed all batches!")
118
 
119
+ # gr.Info("Getting summary...")
120
+ # summary = genra.summarize_history(queries_df)
121
 
122
+ # gr.Info("Preparing results...")
123
+ # results = genra.answers_store
124
+ # final_answers, q_a = [], []
125
+ # for q, g_answers in results.items():
126
+ # for answer in g_answers:
127
+ # final_answers.append({'question':q, "tweets":answer['tweets'], "batch":answer['batch_number'], "summary":answer['summary'] })
128
+ # for t in answer['tweets']:
129
+ # q_a.append((q,t))
130
+ # answers_df = pd.DataFrame.from_dict(final_answers)
131
+ # q_a = list(set(q_a))
132
+ # q_a_df = pd.DataFrame(q_a, columns =['question', 'tweet'])
133
+ # q_a_df = q_a_df.sort_values(by=["question"], ascending=False)
134
 
135
+ # return q_a_df, answers_df, summary
136
 
137
  def qa_summarise(selected_queries, qa_llm_model, text_field, data_df):
138