|
import gradio as gr |
|
import requests |
|
import json |
|
import time |
|
import random |
|
import re |
|
import os |
|
|
|
elements = {'LEO_SOC': ('犯罪主體', 'Subject of Crime'), |
|
'LEO_VIC': ('客體', 'Victim'), |
|
'LEO_ACT': ('不法行為', 'Behavior'), |
|
'LEO_SLE': ('主觀要件', 'Subjective Legal Element of the Offense'), |
|
'LEO_CAU': ('因果關係', 'Causation'), |
|
'LEO_ROH': ('危害結果', 'Result of Hazard'), |
|
'LEO_ATP': ('未遂', 'Attempted'), |
|
'LEO_ACC': ('既遂', 'Accomplished'), |
|
'LEO_ABA': ('中止', 'Abandonment'), |
|
'LEO_PRP': ('預備', 'Preparation'), |
|
'ILG_GLJ': ('阻卻違法事由', 'Ground of Legal Justification'), |
|
'ILG_SDE': ('正當防衛', 'Self-Defense'), |
|
'ILG_NEC': ('緊急避難', 'Emergency Avoidance'), |
|
'CUL_INS': ('心神喪失', 'Insane'), |
|
'CUL_FBD': ('精神耗弱', 'Feebleminded'), |
|
'CUL_ALC': ('原因自由行為', 'Actio Libera in Causa'), |
|
'CUL_RPS': ('責任能力', 'Responsibility'), |
|
'CUL_ANP': ('期待可能性', 'Anticipated Possibility'), |
|
'CUL_GUM': ('犯罪意識', 'Guilty Mind') |
|
} |
|
|
|
secret = os.environ.get("HF_TOKEN") |
|
llm_server = os.environ.get("REMOTE_LLM_SERVER") |
|
|
|
def get_prompt(content, tag, tag_name): |
|
paragraph = content |
|
task = f"請標示出以下段落中的{tag_name}構成要件要素,對應的標籤是[{tag}]。\n\n" |
|
task_template = f'{task}#####\n{paragraph}\n\n#####\n構成要件要素:\n' |
|
|
|
return task_template |
|
|
|
def Jaccard(response_content, tag): |
|
''' |
|
說明: |
|
找出文本中有無指定的標籤。 |
|
Parameters: |
|
response_content (str): 可能有標籤的文本。 |
|
tag (str): 指定的標籤名稱,不包含方括號。 |
|
Retrun: |
|
Jaccard_result (tuple): 回傳一個含有標籤名稱及布林值的 tuple。 |
|
如果有指定的標籤名稱,布林值為True。如果沒有指定的標籤,指定為 None。 |
|
''' |
|
response_head = response_content.split("標註結果:\n")[0] |
|
try: |
|
response_body = response_content.split("標註結果:\n")[1] |
|
except IndexError: |
|
response_body = 'None' |
|
start_index = 0 |
|
|
|
|
|
|
|
findall_open_tags = [m.start() for m in re.finditer(re.escape(f"[{tag}]"), response_body)] |
|
findall_close_tags = [m.start() for m in re.finditer(re.escape(f"[/{tag}]"), response_body)] |
|
try: |
|
parts = [response_body[start_index:findall_open_tags[0]]] |
|
except IndexError: |
|
parts = [] |
|
|
|
for j, idx in enumerate(findall_open_tags): |
|
|
|
tag_text = response_body[idx + len(tag) + 2:findall_close_tags[j]] |
|
parts.append(tag_text) |
|
closed_tag = findall_close_tags[j] + len(tag) + 3 |
|
try: |
|
next_open_tag = findall_open_tags[j+1] |
|
parts.append(response_body[closed_tag: next_open_tag]) |
|
except IndexError: |
|
parts.append(response_body[findall_close_tags[-1] + len(tag) + 3 :]) |
|
|
|
result = '' |
|
for _, part in enumerate(parts): |
|
result = result + part |
|
if result == '': |
|
Jaccard_result = (tag, None) |
|
else: |
|
Jaccard_result = (tag, True) |
|
return Jaccard_result |
|
|
|
def tag_in_color(tag_content, tag): |
|
''' |
|
說明: |
|
將標註結果依照標籤進行標色。 |
|
Parameters: |
|
tag_content (str): 已經標註完畢並有標籤的內容。 |
|
tag (str): 標籤名稱,英文,沒有括號。 |
|
Return: |
|
result (str): 去除標籤並含有 colorama 標色符號的字串。 |
|
''' |
|
response_head = tag_content.split("標註結果:\n")[0] |
|
try: |
|
response_body = tag_content.split("標註結果:\n")[1] |
|
except IndexError: |
|
response_body = 'None' |
|
|
|
start_index = 0 |
|
|
|
|
|
|
|
findall_open_tags = [m.start() for m in re.finditer(re.escape(f"[{tag}]"), response_body)] |
|
findall_close_tags = [m.start() for m in re.finditer(re.escape(f"[/{tag}]"), response_body)] |
|
try: |
|
parts = [(f"{response_body[start_index:findall_open_tags[0]]}", None)] |
|
except IndexError: |
|
parts = [] |
|
|
|
|
|
|
|
|
|
for j, idx in enumerate(findall_open_tags): |
|
|
|
tag_text = response_body[idx + len(tag) + 2:findall_close_tags[j]] |
|
|
|
|
|
parts.append((f"{tag_text}", f"{elements[tag][0]}")) |
|
closed_tag = findall_close_tags[j] + len(tag) + 3 |
|
try: |
|
next_open_tag = findall_open_tags[j+1] |
|
parts.append((f"{response_body[closed_tag: next_open_tag]}", None)) |
|
except IndexError: |
|
parts.append((f"{response_body[findall_close_tags[-1] + len(tag) + 3 :]}", None)) |
|
|
|
return parts |
|
|
|
def le_tagger(content, tag): |
|
|
|
api_base_url = os.environ.get("REMOTE_LLM_SERVER") |
|
url = f"{api_base_url}/generate" |
|
list_url = f"{api_base_url}/tags" |
|
headers = { |
|
"Content-Type": "application/json" |
|
} |
|
random.seed(time.localtime(time.time())[5]) |
|
tag = tag |
|
prompt = get_prompt(content, tag, elements[tag][0]) |
|
ner_model_name = 'gemma2-ner:2b' |
|
data = { |
|
"model": f"{ner_model_name}", |
|
"prompt": f"{prompt}", |
|
|
|
"options": { |
|
"seed": 60, |
|
"top_k": 50, |
|
"top_p": 0.7, |
|
"temperature": 0.9 |
|
}, |
|
"stream": False |
|
} |
|
loop_time = time.time() |
|
jaccard_score = [] |
|
score = () |
|
response = requests.post(url, headers=headers, data=json.dumps(data)) |
|
if response.status_code == 200: |
|
print(f'構成要件要素標籤: {tag}\n構成要件要素:') |
|
response_text = response.text |
|
data = json.loads(response_text) |
|
|
|
actual_response = data["response"] |
|
print(f"{actual_response}\n") |
|
color_result = tag_in_color(actual_response, tag) |
|
print(color_result) |
|
score = Jaccard(actual_response, tag) |
|
jaccard_score.append(score) |
|
print(f"--- 執行耗時:{(time.time() - loop_time)}秒 ---\n") |
|
|
|
else: |
|
|
|
print(f"Error:{response.status_code} {json.loads(response.text)['error']}") |
|
response = requests.get(list_url, headers=headers, data=json.dumps(data)) |
|
response_text = response.text |
|
|
|
data = json.loads(response_text) |
|
actual_response = data["models"] |
|
for _, model in enumerate(data["models"]): |
|
print(model['name']) |
|
print(f"--- 執行耗時:{(time.time() - loop_time)}秒 ---\n") |
|
return color_result |
|
|
|
examples = [ "錢旺來雖明知不法犯罪集團經常使用人頭帳戶,向被害人施用詐術,藉此逃避檢警人員之追緝,並預見向其取得帳戶之人,會以該帳戶作為詐欺取財之不法所用,竟仍基於幫助詐欺之犯意,於民國106年1月11日某時,將其所申辦之上海商業銀行(下稱上海商銀)帳號00000000000000號帳戶之提款卡、密碼,以宅急便之方式,寄送予某真實姓名年籍不詳之詐欺集團成員,供該詐欺集團成員用於詐欺取財之犯行。嗣該詐欺集團成員即意圖為自己不法之所有,於106年1月13日18時許,撥打電話予洪菁霞,向洪菁霞佯稱係其友人,急需資金周轉,致洪菁霞陷於錯誤,於同日14時35分許,至台中市○區○○路000號之郵局,臨櫃匯款新臺幣(下同)10萬元至錢旺來前揭上海商銀帳戶內,隨即遭提領一空。", |
|
"梅友虔明知金融帳戶之存摺、提款卡及密碼係供自己使用之重要理財工具,關係個人財產、信用之表徵,並可預見一般人取得他人金融帳戶使用常與財產犯罪密切相關,且取得他人帳戶之目的在於掩飾犯罪所得之財物或財產上利益不易遭人追查,對於提供帳戶存摺、提款卡及密碼雖無引發他人萌生犯罪之確信,但仍以縱若有人持以犯罪,亦無違反其本意之幫助詐欺之不確定故意,於民國104年11月12日某時,在桃園市中壢區某便利商店內,將其所申辦之臺灣銀行中壢分行帳號000000000000號帳戶(下稱臺灣銀行帳戶)之存摺、提款卡及密碼,以宅急便方式寄送至高雄市○○區○○○路000號予「黃冠智」之成年人使用,容認該「黃冠智」得以使用作為詐欺取財之工具,並以此方式幫助其從事詐欺取財之犯行。迨「黃冠智」取得上開帳戶存摺、提款卡及密碼後,即基於意圖為自己不法所有之詐欺取財犯意,分別於附表所示時間撥打電話予謝家富、陳品蓁,佯以附表所示情節,致謝家富、陳品蓁均陷於錯誤,而分別於附表所示匯款時間,匯款如附表所示金額至上開帳戶內,並旋遭提領一空。案經謝家富、陳品蓁訴由桃園市政府警察局中壢分局移送臺灣桃園地方法院檢察署檢察官偵查後聲請簡易判決處刑。" |
|
] |
|
|
|
with gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.indigo, secondary_hue=gr.themes.colors.fuchsia)) as demo: |
|
gr.Markdown( |
|
""" |
|
<h1 style="text-align: center;">構成要件要素標註器</h1> |
|
""") |
|
with gr.Row() as row: |
|
with gr.Column(): |
|
content = gr.Textbox(label="犯罪事實", placeholder="輸入「犯罪事實」....,或是選取以下範例。") |
|
radio = gr.Radio(choices=[("犯罪主體", "LEO_SOC"), ("主觀要件", "LEO_SLE"), ("不法行為","LEO_ACT"), \ |
|
('客體','LEO_VIC'), ("因果關係","LEO_CAU"), ("危害結果","LEO_ROH") \ |
|
], |
|
value="LEO_SOC", |
|
label="請選擇構成要件要素標籤" |
|
) |
|
with gr.Row(): |
|
output = gr.HighlightedText(label="標註結果") |
|
with gr.Row(): |
|
greet_btn = gr.Button("LE-NER", variant="primary") |
|
greet_btn.click(fn=le_tagger, inputs=[content, radio], outputs=output, api_name="le_tagger") |
|
gr.Examples(examples, label='Examples', inputs=[content]) |
|
|
|
demo.launch(share=False) |