Towhidul commited on
Commit
718d231
·
1 Parent(s): 8969d3c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +370 -169
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import streamlit as st
2
  from transformers import pipeline
3
  import re
4
-
5
  import requests
6
 
7
  API_URL = "https://api-inference.huggingface.co/models/microsoft/prophetnet-large-uncased-squad-qg"
@@ -81,6 +81,7 @@ if evidence_text:
81
 
82
 
83
  import pandas as pd
 
84
  import numpy as np
85
  from allennlp.predictors.predictor import Predictor
86
  import allennlp_models.tagging
@@ -89,11 +90,14 @@ predictor = Predictor.from_path("structured-prediction-srl-bert.tar.gz")
89
  #---------------------------------------------------------------
90
  def claim(text):
91
  import re
92
- df = pd.DataFrame({'claim' : [text]})
93
- pattern = r'(\d+),(\d+)'
94
- def remove_number_commas(match):
95
- return match.group(1) + match.group(2)
96
- df['claim'] = df['claim'].apply(lambda x: re.sub(pattern, remove_number_commas, x))
 
 
 
97
  def srl_allennlp(sent):
98
  try:
99
  #result = predictor.predict(sentence=sent)['verbs'][0]['description']
@@ -160,11 +164,16 @@ def claim(text):
160
  else:
161
  substr = substr + val_string[i]
162
  else:
163
- substr = None
164
- who.append(substr)
165
-
166
- df['who'][j] = who
167
-
 
 
 
 
 
168
  #----------FOR COLUMN "WHAT"------------#
169
  df['what'] = ''
170
  for j in range(len(df['modified'])):
@@ -187,10 +196,15 @@ def claim(text):
187
  else:
188
  substr = substr + val_string[i]
189
  else:
190
- substr = None
191
- what.append(substr)
192
-
193
- df['what'][j] = what
 
 
 
 
 
194
 
195
  #----------FOR COLUMN "WHY"------------#
196
  df['why'] = ''
@@ -214,10 +228,15 @@ def claim(text):
214
  else:
215
  substr = substr + val_string[i]
216
  else:
217
- substr = None
218
- why.append(substr)
 
 
 
219
 
220
- df['why'][j] = why
 
 
221
 
222
  #----------FOR COLUMN "WHEN"------------#
223
  df['when'] = ''
@@ -241,10 +260,15 @@ def claim(text):
241
  else:
242
  substr = substr + val_string[i]
243
  else:
244
- substr = None
245
- when.append(substr)
 
 
 
246
 
247
- df['when'][j] = when
 
 
248
 
249
 
250
  #----------FOR COLUMN "WHERE"------------#
@@ -269,163 +293,337 @@ def claim(text):
269
  else:
270
  substr = substr + val_string[i]
271
  else:
272
- substr = None
273
- where.append(substr)
 
 
 
 
 
274
 
275
- df['where'][j] = where
276
 
277
  data=df[["claim","who","what","why","when","where"]].copy()
278
- import re
279
- def remove_trail_comma(text):
280
- x = re.sub(",\s*$", "", text)
281
- return x
282
-
283
-
284
- data['claim']=data['claim'].apply(lambda x: str(x).replace('\'','').replace('\'',''))
285
- data['claim']=data['claim'].apply(lambda x: str(x).replace('[','').replace(']',''))
286
-
287
-
288
-
289
- data['who']=data['who'].apply(lambda x: str(x).replace(" 's","'s"))
290
- data['who']=data['who'].apply(lambda x: str(x).replace("s ’","s’"))
291
- data['who']=data['who'].apply(lambda x: str(x).replace(" - ","-"))
292
- data['who']=data['who'].apply(lambda x: str(x).replace('\'','').replace('\'',''))
293
- # data['who']=data['who'].apply(lambda x: str(x).replace('"','').replace('"',''))
294
- data['who']=data['who'].apply(lambda x: str(x).replace('[','').replace(']',''))
295
- data['who']=data['who'].apply(lambda x: str(x).rstrip(','))
296
- data['who']=data['who'].apply(lambda x: str(x).lstrip(','))
297
- data['who']=data['who'].apply(lambda x: str(x).replace('None,','').replace('None',''))
298
- data['who']=data['who'].apply(remove_trail_comma)
299
-
300
-
301
-
302
- data['what']=data['what'].apply(lambda x: str(x).replace(" 's","'s"))
303
- data['what']=data['what'].apply(lambda x: str(x).replace("s ’","s’"))
304
- data['what']=data['what'].apply(lambda x: str(x).replace(" - ","-"))
305
- data['what']=data['what'].apply(lambda x: str(x).replace('\'','').replace('\'',''))
306
- # data['what']=data['what'].apply(lambda x: str(x).replace('"','').replace('"',''))
307
- data['what']=data['what'].apply(lambda x: str(x).replace('[','').replace(']',''))
308
- data['what']=data['what'].apply(lambda x: str(x).rstrip(','))
309
- data['what']=data['what'].apply(lambda x: str(x).lstrip(','))
310
- data['what']=data['what'].apply(lambda x: str(x).replace('None,','').replace('None',''))
311
- data['what']=data['what'].apply(remove_trail_comma)
312
-
313
- data['why']=data['why'].apply(lambda x: str(x).replace(" 's","'s"))
314
- data['why']=data['why'].apply(lambda x: str(x).replace("s ’","s’"))
315
- data['why']=data['why'].apply(lambda x: str(x).replace(" - ","-"))
316
- data['why']=data['why'].apply(lambda x: str(x).replace('\'','').replace('\'',''))
317
- # data['why']=data['why'].apply(lambda x: str(x).replace('"','').replace('"',''))
318
- data['why']=data['why'].apply(lambda x: str(x).replace('[','').replace(']',''))
319
- data['why']=data['why'].apply(lambda x: str(x).rstrip(','))
320
- data['why']=data['why'].apply(lambda x: str(x).lstrip(','))
321
- data['why']=data['why'].apply(lambda x: str(x).replace('None,','').replace('None',''))
322
- data['why']=data['why'].apply(remove_trail_comma)
323
-
324
- data['when']=data['when'].apply(lambda x: str(x).replace(" 's","'s"))
325
- data['when']=data['when'].apply(lambda x: str(x).replace("s ’","s’"))
326
- data['when']=data['when'].apply(lambda x: str(x).replace(" - ","-"))
327
- data['when']=data['when'].apply(lambda x: str(x).replace('\'','').replace('\'',''))
328
- # data['when']=data['when'].apply(lambda x: str(x).replace('"','').replace('"',''))
329
- data['when']=data['when'].apply(lambda x: str(x).replace('[','').replace(']',''))
330
- data['when']=data['when'].apply(lambda x: str(x).rstrip(','))
331
- data['when']=data['when'].apply(lambda x: str(x).lstrip(','))
332
- data['when']=data['when'].apply(lambda x: str(x).replace('None,','').replace('None',''))
333
- data['when']=data['when'].apply(remove_trail_comma)
334
-
335
- data['where']=data['where'].apply(lambda x: str(x).replace(" 's","'s"))
336
- data['where']=data['where'].apply(lambda x: str(x).replace("s ’","s’"))
337
- data['where']=data['where'].apply(lambda x: str(x).replace(" - ","-"))
338
- data['where']=data['where'].apply(lambda x: str(x).replace('\'','').replace('\'',''))
339
- # data['where']=data['where'].apply(lambda x: str(x).replace('"','').replace('"',''))
340
- data['where']=data['where'].apply(lambda x: str(x).replace('[','').replace(']',''))
341
- data['where']=data['where'].apply(lambda x: str(x).rstrip(','))
342
- data['where']=data['where'].apply(lambda x: str(x).lstrip(','))
343
- data['where']=data['where'].apply(lambda x: str(x).replace('None,','').replace('None',''))
344
- data['where']=data['where'].apply(remove_trail_comma)
345
  return data
346
  #-------------------------------------------------------------------------
347
- def split_ws(input_list):
348
- import re
349
  output_list = []
350
  for item in input_list:
351
- split_item = re.findall(r'[^",]+|"[^"]*"', item)
352
- output_list += split_item
353
- result = [x.strip() for x in output_list]
354
- return result
 
 
355
 
356
  #--------------------------------------------------------------------------
357
- def gen_qq(df):
358
- w_list=["who","when","where","what","why"]
359
- ans=[]
360
- cl=[]
361
- ind=[]
362
- ques=[]
363
- evid=[]
364
- for index,value in enumerate(w_list):
365
- for i,row in df.iterrows():
366
- srl=df[value][i]
367
- claim=df['claim'][i]
368
- evidence_text=df['evidence'][i]
369
- answer= split_ws(df[value])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  try:
371
- if len(srl.split())>0 and len(srl.split(","))>0:
372
- for j in range(0,len(answer)):
373
- FACT_TO_GENERATE_QUESTION_FROM = f"""{answer[j]} [SEP] {claim}"""
374
- question_ids = query({"inputs":FACT_TO_GENERATE_QUESTION_FROM,
375
- "num_beams":5,
376
- "early_stopping":True})
377
- #print("claim : {}".format(claim))
378
- #print("answer : {}".format(answer[j]))
379
- #print("question : {}".format(question_ids[0]['generated_text']))
380
- ind.append(i)
381
- cl.append(claim)
382
- ans.append(answer[j])
383
- ques.append(question_ids[0]['generated_text'].capitalize())
384
- evid.append(evidence_text)
385
- #print("-----------------------------------------")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
  except:
387
  pass
388
- return cl,ques,ans,evid
 
 
 
 
 
389
  #------------------------------------------------------------
390
- def qa_evidence(final_data):
391
- ans=[]
392
- cl=[]
393
- #ind=[]
394
- ques=[]
395
- evi=[]
396
- srl_ans=[]
397
-
398
-
399
- for i,row in final_data.iterrows():
400
- question=final_data['gen_question'][i]
401
- evidence=final_data['evidence'][i]
402
- claim=final_data['actual_claim'][i]
403
- srl_answer=final_data['actual_answer'][i]
404
- #index=df["index"][i]
405
-
406
- input_evidence = f"question: {question} context: {evidence}"
407
-
408
- answer = query_evidence({
409
- "inputs":input_evidence,
410
- "truncation":True})
411
-
412
- #ind.append(index)
413
- cl.append(claim)
414
- ans.append(answer[0]["generated_text"])
415
- ques.append(question)
416
- evi.append(evidence)
417
- srl_ans.append(srl_answer)
418
-
419
- #print(f"""index: {index}""")
420
- # print(f"""evidence: {evidence}""")
421
- # print(f"""claim: {claim}""")
422
- # print(f"""Question: {question}""")
423
- # print(f"""Answer: {answer}""")
424
- # print(f"""SRL Answer: {srl_answer}""")
425
- # print("------------------------------------")
426
- # return list(zip(cl,ques,srl_ans)),list(zip(evi,ques,ans))
427
- # return cl,ques
428
- return list(zip(ques,srl_ans)),list(zip(ques,ans))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429
 
430
  #------------------------------------------------------------
431
 
@@ -433,10 +631,13 @@ if claim_text:
433
  if evidence_text:
434
  df=claim(claim_text)
435
  df["evidence"]=evidence_text
436
- actual_claim,gen_question,actual_answer,evidence=gen_qq(df)
437
- final_data=pd.DataFrame([actual_claim,gen_question,actual_answer,evidence]).T
438
- final_data.columns=["actual_claim","gen_question","actual_answer","evidence"]
439
- st.dataframe(final_data)
 
 
 
440
  # a,b=qa_evidence(final_data)
441
  # qa_evidence(final_data)
442
  # st.json(qa_evidence(final_data))
 
1
  import streamlit as st
2
  from transformers import pipeline
3
  import re
4
+ import time
5
  import requests
6
 
7
  API_URL = "https://api-inference.huggingface.co/models/microsoft/prophetnet-large-uncased-squad-qg"
 
81
 
82
 
83
  import pandas as pd
84
+ from rouge_score import rouge_scorer
85
  import numpy as np
86
  from allennlp.predictors.predictor import Predictor
87
  import allennlp_models.tagging
 
90
  #---------------------------------------------------------------
91
  def claim(text):
92
  import re
93
+ def remove_special_chars(text):
94
+ # Remove special characters that are not in between numbers
95
+ text = re.sub(r'(?<!\d)[^\w\s]+(?!\d)', '', text)
96
+
97
+ return text
98
+
99
+ df = pd.DataFrame({'claim' : remove_special_chars(text)},index=[0])
100
+
101
  def srl_allennlp(sent):
102
  try:
103
  #result = predictor.predict(sentence=sent)['verbs'][0]['description']
 
164
  else:
165
  substr = substr + val_string[i]
166
  else:
167
+ pass
168
+ if len(substr)!= 0:
169
+ who.append(substr)
170
+ else:
171
+ pass
172
+
173
+
174
+ df['who'][j] = "<sep>".join(who)
175
+ # else:
176
+ # continue
177
  #----------FOR COLUMN "WHAT"------------#
178
  df['what'] = ''
179
  for j in range(len(df['modified'])):
 
196
  else:
197
  substr = substr + val_string[i]
198
  else:
199
+ pass
200
+ if len(substr)!= 0:
201
+ what.append(substr)
202
+ else:
203
+ pass
204
+
205
+ df['what'][j] = "<sep>".join(what)
206
+ # else:
207
+ # continue
208
 
209
  #----------FOR COLUMN "WHY"------------#
210
  df['why'] = ''
 
228
  else:
229
  substr = substr + val_string[i]
230
  else:
231
+ pass
232
+ if len(substr)!= 0:
233
+ why.append(substr)
234
+ else:
235
+ pass
236
 
237
+ df['why'][j] = "<sep>".join(why)
238
+ # else:
239
+ # continue
240
 
241
  #----------FOR COLUMN "WHEN"------------#
242
  df['when'] = ''
 
260
  else:
261
  substr = substr + val_string[i]
262
  else:
263
+ pass
264
+ if len(substr)!= 0:
265
+ when.append(substr)
266
+ else:
267
+ pass
268
 
269
+ df['when'][j] = "<sep>".join(when)
270
+ # else:
271
+ # continue
272
 
273
 
274
  #----------FOR COLUMN "WHERE"------------#
 
293
  else:
294
  substr = substr + val_string[i]
295
  else:
296
+ pass
297
+ if len(substr)!= 0:
298
+ where.append(substr)
299
+ else:
300
+ pass
301
+
302
+ df['where'][j] = "<sep>".join(where)
303
 
 
304
 
305
  data=df[["claim","who","what","why","when","where"]].copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  return data
307
  #-------------------------------------------------------------------------
308
+ def split_ws(input_list, delimiter="<sep>"):
 
309
  output_list = []
310
  for item in input_list:
311
+ split_item = item.split(delimiter)
312
+ for sub_item in split_item:
313
+ sub_item = sub_item.strip()
314
+ if sub_item:
315
+ output_list.append(sub_item)
316
+ return output_list
317
 
318
  #--------------------------------------------------------------------------
319
+ def calc_rouge_l_score(list_of_evidence, list_of_ans):
320
+ scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
321
+ scores = scorer.score(' '.join(list_of_evidence), ' '.join(list_of_ans))
322
+ return scores['rougeL'].fmeasure
323
+ #-------------------------------------------------------------------------
324
+
325
+
326
+ def rephrase_question_who(question):
327
+ if not question.lower().startswith("who"):
328
+ words = question.split()
329
+ words[0] = "Who"
330
+ return " ".join(words)
331
+ else:
332
+ return question
333
+ #------------------------------------------------------------------------
334
+ def gen_qa_who(df):
335
+ list_of_ques_who=[]
336
+ list_of_ans_who=[]
337
+ list_of_evidence_answer_who=[]
338
+ rouge_l_scores=[]
339
+ for i,row in df.iterrows():
340
+ srl=df["who"][i]
341
+ claim=df['claim'][i]
342
+ answer= split_ws(df["who"])
343
+ evidence=df["evidence"][i]
344
+ #time.sleep(10)
345
+ if srl!="":
346
  try:
347
+ for j in range(0,len(answer)):
348
+ FACT_TO_GENERATE_QUESTION_FROM = f"""generate_the_question_from_context_using_the_next_answer:{answer[j]} [SEP] context:{claim}"""
349
+ #FACT_TO_GENERATE_QUESTION_FROM = f"""generate_who_based_question_from_context_using_the_next_answer:{answer[j]} [SEP] context:{claim}"""
350
+ #time.sleep(10)
351
+ question_ids = query({"inputs":FACT_TO_GENERATE_QUESTION_FROM,
352
+ "num_beams":5,
353
+ "early_stopping":True,
354
+ "min_length": 100,"wait_for_model":True})[0]['generated_text'].capitalize()
355
+ question_ids = rephrase_question_who(question_ids)
356
+ list_of_ques_who.append(f"""Q{j+1}:{question_ids}""")
357
+ list_of_ans_who.append(f"""Ans{j+1}:{answer[j]}""")
358
+ input_evidence = f"answer_the_next_question_from_context: {question_ids} context: {evidence}"
359
+ #time.sleep(10)
360
+ answer_evidence = query_evidence({"inputs":input_evidence,"truncation":True,"wait_for_model":True})[0]['generated_text']
361
+ if answer_evidence.lower() in evidence.lower():
362
+ list_of_evidence_answer_who.append(f"""Evidence{j+1}:{answer_evidence}""")
363
+ else:
364
+ answer_evidence=""
365
+ list_of_evidence_answer_who.append(f"""No mention of 'who'in any related documents.""")
366
+ threshold = 0.2
367
+ list_of_pairs = [(answer_evidence, answer[j])]
368
+ rouge_l_score = calc_rouge_l_score(answer_evidence, answer[j])
369
+ if rouge_l_score >= threshold:
370
+ verification_status = 'Verified Valid'
371
+ elif rouge_l_score == 0:
372
+ verification_status = 'Not verifiable'
373
+ else:
374
+ verification_status = 'Verified False'
375
+ rouge_l_scores.append(verification_status)
376
  except:
377
  pass
378
+ else:
379
+ list_of_ques_who="No claims"
380
+ list_of_ans_who=""
381
+ list_of_evidence_answer_who="No mention of 'who'in any related documents."
382
+ rouge_l_scores="Not verifiable"
383
+ return list_of_ques_who,list_of_ans_who,rouge_l_scores,list_of_evidence_answer_who
384
  #------------------------------------------------------------
385
+
386
+ def rephrase_question_what(question):
387
+ if not question.lower().startswith("what"):
388
+ words = question.split()
389
+ words[0] = "What"
390
+ return " ".join(words)
391
+ else:
392
+ return question
393
+ #----------------------------------------------------------
394
+ def gen_qa_what(df):
395
+ list_of_ques_what=[]
396
+ list_of_ans_what=[]
397
+ list_of_evidence_answer_what=[]
398
+ rouge_l_scores=[]
399
+ for i,row in df.iterrows():
400
+ srl=df["what"][i]
401
+ claim=df['claim'][i]
402
+ answer= split_ws(df["what"])
403
+ evidence=df["evidence"][i]
404
+ #time.sleep(10)
405
+ if srl!="":
406
+ try:
407
+ for j in range(0,len(answer)):
408
+ FACT_TO_GENERATE_QUESTION_FROM = f"""generate_the_question_from_context_using_the_next_answer:{answer[j]} [SEP] context:{claim}"""
409
+ #time.sleep(10)
410
+ question_ids = query({"inputs":FACT_TO_GENERATE_QUESTION_FROM,
411
+ "num_beams":5,
412
+ "early_stopping":True,
413
+ "min_length": 100,"wait_for_model":True})[0]['generated_text'].capitalize()
414
+ question_ids = rephrase_question_what(question_ids)
415
+ list_of_ques_what.append(f"""Q{j+1}:{question_ids}""")
416
+ list_of_ans_what.append(f"""Ans{j+1}:{answer[j]}""")
417
+ input_evidence = f"answer_the_next_question_from_context: {question_ids} context: {evidence}"
418
+ #time.sleep(10)
419
+ answer_evidence = query_evidence({"inputs":input_evidence,"truncation":True,"wait_for_model":True})[0]['generated_text']
420
+ if answer_evidence.lower() in evidence.lower():
421
+ list_of_evidence_answer_what.append(f"""Evidence{j+1}:{answer_evidence}""")
422
+
423
+ else:
424
+ answer_evidence=""
425
+ list_of_evidence_answer_what.append(f"""No mention of 'what'in any related documents.""")
426
+ threshold = 0.2
427
+ list_of_pairs = [(answer_evidence, answer[j])]
428
+ rouge_l_score = calc_rouge_l_score(answer_evidence, answer[j])
429
+ if rouge_l_score >= threshold:
430
+ verification_status = 'Verified Valid'
431
+ elif rouge_l_score == 0:
432
+ verification_status = 'Not verifiable'
433
+ else:
434
+ verification_status = 'Verified False'
435
+ rouge_l_scores.append(verification_status)
436
+ except:
437
+ pass
438
+ else:
439
+ list_of_ques_what="No claims"
440
+ list_of_ans_what=""
441
+ list_of_evidence_answer_what="No mention of 'what'in any related documents."
442
+ rouge_l_scores="Not verifiable"
443
+ return list_of_ques_what,list_of_ans_what,rouge_l_scores,list_of_evidence_answer_what
444
+ #----------------------------------------------------------
445
+
446
+ def rephrase_question_why(question):
447
+ if not question.lower().startswith("why"):
448
+ words = question.split()
449
+ words[0] = "Why"
450
+ return " ".join(words)
451
+ else:
452
+ return question
453
+
454
+ #---------------------------------------------------------
455
+ def gen_qa_why(df):
456
+ list_of_ques_why=[]
457
+ list_of_ans_why=[]
458
+ list_of_evidence_answer_why=[]
459
+ rouge_l_scores=[]
460
+ for i,row in df.iterrows():
461
+ srl=df["why"][i]
462
+ claim=df['claim'][i]
463
+ answer= split_ws(df["why"])
464
+ evidence=df["evidence"][i]
465
+ #time.sleep(10)
466
+ if srl!="":
467
+ try:
468
+ for j in range(0,len(answer)):
469
+ FACT_TO_GENERATE_QUESTION_FROM = f"""generate_the_question_from_context_using_the_next_answer:{answer[j]} [SEP] context:{claim}"""
470
+ #time.sleep(10)
471
+ question_ids = query({"inputs":FACT_TO_GENERATE_QUESTION_FROM,
472
+ "num_beams":5,
473
+ "early_stopping":True,
474
+ "min_length": 100,"wait_for_model":True})[0]['generated_text'].capitalize()
475
+ question_ids = rephrase_question_why(question_ids)
476
+ list_of_ques_why.append(f"""Q{j+1}:{question_ids}""")
477
+ list_of_ans_why.append(f"""Ans{j+1}:{answer[j]}""")
478
+ input_evidence = f"answer_the_next_question_from_context: {question_ids} context: {evidence}"
479
+ #time.sleep(10)
480
+ answer_evidence = query_evidence({"inputs":input_evidence,"truncation":True,"wait_for_model":True})[0]['generated_text']
481
+ if answer_evidence.lower() in evidence.lower():
482
+ list_of_evidence_answer_why.append(f"""Evidence{j+1}:{answer_evidence}""")
483
+ else:
484
+ answer_evidence=""
485
+ list_of_evidence_answer_why.append(f"""No mention of 'why'in any related documents.""")
486
+ threshold = 0.2
487
+ list_of_pairs = [(answer_evidence, answer[j])]
488
+ rouge_l_score = calc_rouge_l_score(answer_evidence, answer[j])
489
+ if rouge_l_score >= threshold:
490
+ verification_status = 'Verified Valid'
491
+ elif rouge_l_score == 0:
492
+ verification_status = 'Not verifiable'
493
+ else:
494
+ verification_status = 'Verified False'
495
+ rouge_l_scores.append(verification_status)
496
+ except:
497
+ pass
498
+ else:
499
+ list_of_ques_why="No claims"
500
+ list_of_ans_why=""
501
+ list_of_evidence_answer_why="No mention of 'why'in any related documents."
502
+ rouge_l_scores="Not verifiable"
503
+ return list_of_ques_why,list_of_ans_why,rouge_l_scores,list_of_evidence_answer_why
504
+
505
+ #---------------------------------------------------------
506
+
507
+ def rephrase_question_when(question):
508
+ if not question.lower().startswith("when"):
509
+ words = question.split()
510
+ words[0] = "When"
511
+ return " ".join(words)
512
+ else:
513
+ return question
514
+ #---------------------------------------------------------
515
+ def gen_qa_when(df):
516
+ list_of_ques_when=[]
517
+ list_of_ans_when=[]
518
+ list_of_evidence_answer_when=[]
519
+ rouge_l_scores=[]
520
+ for i,row in df.iterrows():
521
+ srl=df["when"][i]
522
+ claim=df['claim'][i]
523
+ answer= split_ws(df["when"])
524
+ evidence=df["evidence"][i]
525
+ #time.sleep(10)
526
+ if srl!="":
527
+ try:
528
+ for j in range(0,len(answer)):
529
+ FACT_TO_GENERATE_QUESTION_FROM = f"""generate_the_question_from_context_using_the_next_answer:{answer[j]} [SEP] context:{claim}"""
530
+ #time.sleep(10)
531
+ question_ids = query({"inputs":FACT_TO_GENERATE_QUESTION_FROM,
532
+ "num_beams":5,
533
+ "early_stopping":True,
534
+ "min_length": 100,"wait_for_model":True})[0]['generated_text'].capitalize()
535
+ question_ids = rephrase_question_when(question_ids)
536
+ list_of_ques_when.append(f"""Q{j+1}:{question_ids}""")
537
+ list_of_ans_when.append(f"""Ans{j+1}:{answer[j]}""")
538
+ input_evidence = f"answer_the_next_question_from_context: {question_ids} context: {evidence}"
539
+ #time.sleep(10)
540
+ answer_evidence = query_evidence({"inputs":input_evidence,"truncation":True,"wait_for_model":True})[0]['generated_text']
541
+ if answer_evidence.lower() in evidence.lower():
542
+ list_of_evidence_answer_when.append(f"""Evidence{j+1}:{answer_evidence}""")
543
+ else:
544
+ answer_evidence=""
545
+ list_of_evidence_answer_when.append(f"""No mention of 'when'in any related documents.""")
546
+ threshold = 0.2
547
+ list_of_pairs = [(answer_evidence, answer[j])]
548
+ rouge_l_score = calc_rouge_l_score(answer_evidence, answer[j])
549
+ if rouge_l_score >= threshold:
550
+ verification_status = 'Verified Valid'
551
+ elif rouge_l_score == 0:
552
+ verification_status = 'Not verifiable'
553
+ else:
554
+ verification_status = 'Verified False'
555
+ rouge_l_scores.append(verification_status)
556
+ except:
557
+ pass
558
+ else:
559
+ list_of_ques_when="No claims"
560
+ list_of_ans_when=""
561
+ list_of_evidence_answer_when="No mention of 'when'in any related documents."
562
+ rouge_l_scores="Not verifiable"
563
+ return list_of_ques_when,list_of_ans_when,rouge_l_scores,list_of_evidence_answer_when
564
+
565
+ #------------------------------------------------------
566
+
567
+ def rephrase_question_where(question):
568
+ if not question.lower().startswith("where"):
569
+ words = question.split()
570
+ words[0] = "Where"
571
+ return " ".join(words)
572
+ else:
573
+ return question
574
+ #------------------------------------------------------
575
+ def gen_qa_where(df):
576
+ list_of_ques_where=[]
577
+ list_of_ans_where=[]
578
+ list_of_evidence_answer_where=[]
579
+ rouge_l_scores=[]
580
+ for i,row in df.iterrows():
581
+ srl=df["where"][i]
582
+ claim=df['claim'][i]
583
+ answer= split_ws(df["where"])
584
+ evidence=df["evidence"][i]
585
+ #time.sleep(10)
586
+ if srl!="":
587
+ try:
588
+ for j in range(0,len(answer)):
589
+ FACT_TO_GENERATE_QUESTION_FROM = f"""generate_the_question_from_context_using_the_next_answer:{answer[j]} [SEP] context:{claim}"""
590
+ #time.sleep(10)
591
+ question_ids = query({"inputs":FACT_TO_GENERATE_QUESTION_FROM,
592
+ "num_beams":5,
593
+ "early_stopping":True,
594
+ "min_length": 100,"wait_for_model":True})[0]['generated_text'].capitalize()
595
+ question_ids = rephrase_question_where(question_ids)
596
+ list_of_ques_where.append(f"""Q{j+1}:{question_ids}""")
597
+ list_of_ans_where.append(f"""Ans{j+1}:{answer[j]}""")
598
+ input_evidence = f"answer_the_next_question_from_context: {question_ids} context: {evidence}"
599
+ #time.sleep(10)
600
+ answer_evidence = query_evidence({"inputs":input_evidence,"truncation":True,"wait_for_model":True})[0]['generated_text']
601
+ if answer_evidence.lower() in evidence.lower():
602
+ list_of_evidence_answer_where.append(f"""Evidence{j+1}:{answer_evidence}""")
603
+ else:
604
+ answer_evidence=""
605
+ list_of_evidence_answer_where.append(f"""No mention of 'where'in any related documents.""")
606
+ threshold = 0.2
607
+ list_of_pairs = [(answer_evidence, answer[j])]
608
+ rouge_l_score = calc_rouge_l_score(answer_evidence, answer[j])
609
+ if rouge_l_score >= threshold:
610
+ verification_status = 'Verified Valid'
611
+ elif rouge_l_score == 0:
612
+ verification_status = 'Not verifiable'
613
+ else:
614
+ verification_status = 'Verified False'
615
+ rouge_l_scores.append(verification_status)
616
+ except:
617
+ pass
618
+ else:
619
+ list_of_ques_where="No claims"
620
+ list_of_ans_where=""
621
+ list_of_evidence_answer_where="No mention of 'where'in any related documents."
622
+ rouge_l_scores="Not verifiable"
623
+ return list_of_ques_where,list_of_ans_where,rouge_l_scores,list_of_evidence_answer_where
624
+
625
+ #------------------------------------------------------
626
+
627
 
628
  #------------------------------------------------------------
629
 
 
631
  if evidence_text:
632
  df=claim(claim_text)
633
  df["evidence"]=evidence_text
634
+ final_df = pd.DataFrame(columns=['Who Claims', 'What Claims', 'When Claims', 'Where Claims', 'Why Claims'])
635
+ final_df["Who Claims"]=gen_qa_who(df)
636
+ final_df["What Claims"]=gen_qa_what(df)
637
+ final_df["When Claims"]=gen_qa_when(df)
638
+ final_df["Where Claims"]=gen_qa_where(df)
639
+ final_df["Why Claims"]=gen_qa_why(df)
640
+ st.dataframe(final_df)
641
  # a,b=qa_evidence(final_data)
642
  # qa_evidence(final_data)
643
  # st.json(qa_evidence(final_data))