|
import editdistance |
|
from hexdump2 import hexdump |
|
import gradio as gr |
|
import shlex |
|
import subprocess |
|
import tempfile |
|
|
|
description = """This is a space testing a method for evaluating the quality of decompilation. |
|
|
|
Currently unhandled features: |
|
* PIC stuff |
|
* Global references |
|
* Function calls |
|
* Wildcards in target function? |
|
* How to extract compilable decompilation from decompilers? |
|
""" |
|
|
|
def trim(str, n): |
|
return "\n".join(str.splitlines()[n:]) |
|
|
|
def trim_objdump(str): |
|
return trim(str, 7) |
|
|
|
def disassemble_bytes(byte_data, architecture): |
|
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as temp_bin_file: |
|
temp_bin_file.write(byte_data) |
|
temp_bin_file_name = temp_bin_file.name |
|
|
|
disassembly = subprocess.run( |
|
["objdump", "-D", "-b", "binary", "-m", architecture, temp_bin_file_name], |
|
capture_output=True, |
|
text=True |
|
).stdout |
|
disassembly = trim_objdump(disassembly) |
|
|
|
return disassembly |
|
|
|
def compile(compiler, flags, source): |
|
|
|
with tempfile.NamedTemporaryFile(suffix=".c", delete=False) as temp_c_file: |
|
temp_c_file.write(source.encode()) |
|
temp_c_file_name = temp_c_file.name |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".o", delete=False) as temp_o_file: |
|
temp_o_file_name = temp_o_file.name |
|
|
|
|
|
result = subprocess.run( |
|
[compiler, "-c", temp_c_file_name] |
|
+ shlex.split(flags) |
|
+ ["-o", temp_o_file_name], |
|
capture_output=True, |
|
text=True, |
|
) |
|
compile_output = result.stdout + result.stderr |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".raw", delete=True) as raw_bytes_file: |
|
subprocess.run( |
|
[ |
|
"objcopy", |
|
"--only-section", |
|
".text", |
|
|
|
"--only-section", |
|
".text.*", |
|
"-O", |
|
"binary", |
|
temp_o_file_name, |
|
raw_bytes_file.name, |
|
] |
|
) |
|
compiled_bytes = raw_bytes_file.read() |
|
|
|
|
|
disassembly = subprocess.run( |
|
["objdump", "-d", temp_o_file_name], |
|
capture_output=True, |
|
text=True |
|
).stdout |
|
disassembly = trim_objdump(disassembly) |
|
|
|
if result.returncode == 0: |
|
return compiled_bytes, compile_output, disassembly |
|
else: |
|
return None, compile_output, disassembly |
|
|
|
|
|
|
|
|
|
|
|
def predict(target_bytes, source, compiler, flags, architecture): |
|
target_bytes = bytes.fromhex(target_bytes) |
|
compiled_bytes, compile_output, compiled_disassembly = compile(compiler, flags, source) |
|
target_disassembly = disassemble_bytes(target_bytes, architecture) |
|
|
|
if compiled_bytes is not None: |
|
return ( |
|
hexdump(compiled_bytes, result="return"), |
|
hexdump(target_bytes, result="return"), |
|
editdistance.eval(compiled_bytes, target_bytes), |
|
compile_output, |
|
compiled_disassembly, |
|
target_disassembly |
|
) |
|
else: |
|
return ( |
|
"Compilation failed", |
|
hexdump(target_bytes, result="return"), |
|
-1, |
|
compile_output, |
|
compiled_disassembly, |
|
target_disassembly |
|
) |
|
|
|
|
|
def run(): |
|
demo = gr.Interface( |
|
fn=predict, |
|
description=description, |
|
inputs=[ |
|
gr.Textbox( |
|
lines=10, |
|
label="Bytes of Target Function (in hex)", |
|
value="b8 2a 00 00 00 c3", |
|
), |
|
gr.Textbox( |
|
lines=10, |
|
label="Decompiled C Source Code", |
|
value="int foo() { return 0; }", |
|
), |
|
gr.Textbox(label="Compiler", value="g++"), |
|
gr.Textbox(label="Compiler Flags", value="-O2"), |
|
gr.Textbox(label="Architecture (for disassembler)", value="i386"), |
|
], |
|
outputs=[ |
|
gr.Textbox(label="Compiled bytes"), |
|
gr.Textbox(label="Target bytes"), |
|
gr.Number(label="Edit distance (lower is better)"), |
|
gr.Textbox(label="Compiler Output"), |
|
gr.Textbox(label="Compiled Disassembly"), |
|
gr.Textbox(label="Target Disassembly"), |
|
], |
|
) |
|
|
|
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True) |
|
|
|
|
|
run() |
|
|