nkasmanoff commited on
Commit
50526d9
·
1 Parent(s): 04826ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -3
app.py CHANGED
@@ -2,9 +2,8 @@ import re
2
  import gradio as gr
3
  from PIL import Image
4
  from transformers import AutoProcessor, AutoModelForCausalLM
5
- import sys,os
6
- sys.path.append(os.getcwd())
7
- from knowledge_extraction import get_entities, get_relations
8
  device='cpu'
9
 
10
  processor = AutoProcessor.from_pretrained("microsoft/git-base")
@@ -24,6 +23,84 @@ def predict(image,max_length=64,device='cpu'):
24
  return knowlege_triplet
25
 
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  input = gr.inputs.Image(label="Please upload a remote sensing image", type = 'pil', optional=True)
28
  output = gr.outputs.Textbox(type="text",label="Captions")
29
 
 
2
  import gradio as gr
3
  from PIL import Image
4
  from transformers import AutoProcessor, AutoModelForCausalLM
5
+ import spacy
6
+ from spacy.matcher import Matcher
 
7
  device='cpu'
8
 
9
  processor = AutoProcessor.from_pretrained("microsoft/git-base")
 
23
  return knowlege_triplet
24
 
25
 
26
+ def get_entities(sent):
27
+ ## chunk 1
28
+ ent1 = ""
29
+ ent2 = ""
30
+
31
+ prv_tok_dep = "" # dependency tag of previous token in the sentence
32
+ prv_tok_text = "" # previous token in the sentence
33
+
34
+ prefix = ""
35
+ modifier = ""
36
+
37
+ #############################################################
38
+
39
+ for tok in nlp(sent):
40
+ ## chunk 2
41
+ # if token is a punctuation mark then move on to the next token
42
+ if tok.dep_ != "punct":
43
+ # check: token is a compound word or not
44
+ if tok.dep_ == "compound":
45
+ prefix = tok.text
46
+ # if the previous word was also a 'compound' then add the current word to it
47
+ if prv_tok_dep == "compound":
48
+ prefix = prv_tok_text + " " + tok.text
49
+
50
+ # check: token is a modifier or not
51
+ if tok.dep_.endswith("mod") == True:
52
+ modifier = tok.text
53
+ # if the previous word was also a 'compound' then add the current word to it
54
+ if prv_tok_dep == "compound":
55
+ modifier = prv_tok_text + " " + tok.text
56
+
57
+ ## chunk 3
58
+ if tok.dep_.find("subj") == True:
59
+ ent1 = modifier + " " + prefix + " " + tok.text
60
+ prefix = ""
61
+ modifier = ""
62
+ prv_tok_dep = ""
63
+ prv_tok_text = ""
64
+
65
+ ## chunk 4
66
+ if tok.dep_.find("obj") == True:
67
+ ent2 = modifier + " " + prefix + " " + tok.text
68
+
69
+ ## chunk 5
70
+ # update variables
71
+ prv_tok_dep = tok.dep_
72
+ prv_tok_text = tok.text
73
+ #############################################################
74
+
75
+ return [ent1.strip(), ent2.strip()]
76
+
77
+
78
+
79
+
80
+ def get_relation(sent):
81
+ nlp = spacy.load('en_core_web_sm')
82
+
83
+ doc = nlp(sent)
84
+
85
+ # Matcher class object
86
+ matcher = Matcher(nlp.vocab)
87
+
88
+ #define the pattern
89
+ pattern = [{'DEP':'ROOT'},
90
+ {'DEP':'prep','OP':"?"},
91
+ {'DEP':'agent','OP':"?"},
92
+ {'POS':'ADJ','OP':"?"}]
93
+
94
+ matcher.add('matching_pattern', patterns=[pattern])
95
+ matches = matcher(doc)
96
+ k = len(matches) - 1
97
+
98
+ span = doc[matches[k][1]:matches[k][2]]
99
+
100
+ return(span.text)
101
+
102
+
103
+
104
  input = gr.inputs.Image(label="Please upload a remote sensing image", type = 'pil', optional=True)
105
  output = gr.outputs.Textbox(type="text",label="Captions")
106