File size: 4,199 Bytes
97aff81
3606254
 
 
 
 
 
 
6425dcc
3606254
 
 
 
 
 
 
 
 
 
d7cb910
6425dcc
 
d7cb910
 
3606254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import gradio as gr
from frame_semantic_transformer import FrameSemanticTransformer
from transformers import T5ForConditionalGeneration, T5TokenizerFast
from frame_semantic_transformer.data.frame_types import Frame
from frame_semantic_transformer.data.loaders.loader import InferenceLoader
import pandas as pd
import ast
import re
import os
from nltk.stem import SnowballStemmer

# Huggingface model path
huggingface_model_path = "nelsonjq/frame-semantic-transformer-french-small"

# Load the model and tokenizer from Huggingface
model = T5ForConditionalGeneration.from_pretrained(huggingface_model_path)
tokenizer = T5TokenizerFast.from_pretrained(huggingface_model_path)

# Load the DataFrame

import subprocess
os.makedirs("Asfalda", exist_ok=True)
subprocess.run(['wget', '--output-document=Asfalda/frame_lus_df.tsv', 'https://seafile.unistra.fr/f/0155ced00b8d441eb131/?dl=1']
              )
frame_lus_df = pd.read_csv("Asfalda/frame_lus_df.tsv", delimiter='\t')

# Filter out 'Other_sense' and normalize frame names
frame_lus_df = frame_lus_df[frame_lus_df['Name'] != 'Other_sense']
frame_lus_df['Name'] = frame_lus_df['Name'].apply(lambda x: x.split(".")[0])
frame_lus_df['Name'] = frame_lus_df['Name'].str.replace(r'^[Ff][Rr][Vv]_', '', regex=True)

# Add missing frame names
new_row_Suasion = {
    'Name': 'Suasion',
    'Core_Elms': "['Content', 'Cognizer', 'Persuader', 'Target', 'Text', 'Action', 'Addressee']",
    'Non_Core_Elms': "['Speaker', 'Topic']",
    'Lus': "['convaincre.v', 'convertir.v', 'persuader.v', 'convaincant.a', 'persuasion.n', 'dissuader.v', 'apprendre.v', 'dissuasion.n', 'décider.v']",
    'Lus_simple': "['convaincre', 'convertir', 'persuader', 'convaincant', 'persuasion', 'dissuader', 'apprendre', 'dissuasion', 'décider']"
}

new_row_Arriving = {
    'Name': 'Arriving',
    'Core_Elms': "['Theme', 'Goal']",
    'Non_Core_Elms': "['Target', 'Means']",
    'Lus': "['gagner.v']",
    'Lus_simple': "['gagner']"
}

new_row_Suasion_df = pd.DataFrame([new_row_Suasion])
new_row_Arriving_df = pd.DataFrame([new_row_Arriving])

frame_lus_df = pd.concat([frame_lus_df, new_row_Suasion_df, new_row_Arriving_df], ignore_index=True)

# Define FrenchInferenceLoader
french_stemmer = SnowballStemmer("french")

def extract_frame(df_row_frame) -> Frame:
    name = df_row_frame['Name']
    core_elms = ast.literal_eval(df_row_frame['Core_Elms'])
    non_core_elms = ast.literal_eval(df_row_frame['Non_Core_Elms'])
    lus = ast.literal_eval(df_row_frame['Lus'])
    return Frame(name=name, core_elements=core_elms, non_core_elements=non_core_elms, lexical_units=lus)

class FrenchInferenceLoader(InferenceLoader):
    def __init__(self, french_framenet_df_file):
        self.frames = []
        for index, row in french_framenet_df_file.iterrows():
            frame = extract_frame(row)
            self.frames.append(frame)

    def load_frames(self):
        return self.frames

    def normalize_lexical_unit_text(self, lu: str) -> str:
        normalized_lu = lu.lower()
        if '.' in normalized_lu:
            normalized_lu = normalized_lu.split('.')[0]
        normalized_lu = re.sub(r"[^a-z0-9 ]", "", normalized_lu)
        return french_stemmer.stem(normalized_lu)

# Initialize the FrenchInferenceLoader and FrameSemanticTransformer
inference_loader = FrenchInferenceLoader(frame_lus_df)
transformer = FrameSemanticTransformer(huggingface_model_path, inference_loader=inference_loader)

# Function to process the input sentence and display frame detection results
def detect_frames_in_text(input_text):
    result = transformer.detect_frames(input_text)
    output = f"Results found in the sentence:\n\n{result.sentence}\n"
    for frame in result.frames:
        output += f"\nFRAME: {frame.name}\n\nFrame Elements:\n"
        for element in frame.frame_elements:
            output += f"\t\t{element.name}: {element.text}\n"
    return output

# Gradio interface
iface = gr.Interface(
    fn=detect_frames_in_text,
    inputs="text",
    outputs="text",
    title="French Frame Detection App",
    description="Enter a French sentence to detect frames and frame elements using the FrameSemanticTransformer model."
)

# Launch the app
iface.launch()