Spaces:
Sleeping
Sleeping
import editdistance | |
import frontmatter | |
from hexdump2 import hexdump | |
import gradio as gr | |
import json | |
import shlex | |
import subprocess | |
import tempfile | |
description = frontmatter.load("README.md").content | |
def trim(str, n): | |
return "\n".join(str.splitlines()[n:]) | |
def trim_objdump(str): | |
return trim(str, 7) | |
def disassemble_bytes(byte_data, architecture, options): | |
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as temp_bin_file: | |
temp_bin_file.write(byte_data) | |
temp_bin_file_name = temp_bin_file.name | |
disassembly = subprocess.run( | |
["objdump", "-D", "-b", "binary", "-m", architecture, "-M", options, temp_bin_file_name], | |
capture_output=True, | |
text=True | |
).stdout | |
disassembly = trim_objdump(disassembly) | |
return disassembly | |
def compile(compiler, flags, source): | |
# Create a temporary file for the C source code | |
with tempfile.NamedTemporaryFile(suffix=".c", delete=False) as temp_c_file: | |
temp_c_file.write(source.encode()) | |
temp_c_file_name = temp_c_file.name | |
# Create a temporary file for the object file | |
with tempfile.NamedTemporaryFile(suffix=".o", delete=False) as temp_o_file: | |
temp_o_file_name = temp_o_file.name | |
# Compile the C file to an object file | |
result = subprocess.run( | |
[compiler, "-c", temp_c_file_name] | |
+ shlex.split(flags) | |
+ ["-o", temp_o_file_name], | |
capture_output=True, | |
text=True, | |
) | |
compile_output = result.stdout + result.stderr | |
# Create a temporary file for the raw bytes | |
with tempfile.NamedTemporaryFile(suffix=".raw", delete=True) as raw_bytes_file: | |
subprocess.run( | |
[ | |
"objcopy", | |
"--only-section", | |
".text", | |
# XXX in reality we should probably look at the sections | |
"--only-section", | |
".text.*", | |
"-O", | |
"binary", | |
temp_o_file_name, | |
raw_bytes_file.name, | |
] | |
) | |
compiled_bytes = raw_bytes_file.read() | |
# Disassemble the object file | |
disassembly = subprocess.run( | |
["objdump", "-d", temp_o_file_name], | |
capture_output=True, | |
text=True | |
).stdout | |
disassembly = trim_objdump(disassembly) | |
# Relocs | |
# relocs = subprocess.run( | |
# ["objdump", "-r", temp_o_file_name], | |
# capture_output=True, | |
# text=True | |
# ).stdout | |
# relocs = trim(relocs, 3) | |
json_relocs = subprocess.run( | |
["llvm-readobj-19", "--elf-output-style=JSON", "--relocations", temp_o_file_name], | |
capture_output=True, | |
text=True, | |
).stdout | |
json_relocs = json.loads(json_relocs) | |
json_relocs = json_relocs[0]["Relocations"] | |
json_relocs = [r["Relocation"] for d in json_relocs for r in d['Relocs']] | |
# Filter out .text | |
json_relocs = [r for r in json_relocs if r["Symbol"]["Name"] != ".text"] | |
if result.returncode == 0: | |
return json_relocs, compiled_bytes, compile_output, disassembly | |
else: | |
return None, None, compile_output, disassembly | |
def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options): | |
target_bytes = bytes.fromhex(target_bytes) | |
compiled_relocs, compiled_bytes, compile_output, compiled_disassembly = compile(compiler, flags, source) | |
target_disassembly = disassemble_bytes(target_bytes, disasm_arch, disasm_options) | |
if compiled_bytes is not None: | |
return ( | |
hexdump(compiled_bytes, result="return"), | |
hexdump(target_bytes, result="return"), | |
editdistance.eval(compiled_bytes, target_bytes), | |
compile_output, | |
compiled_disassembly, | |
compiled_relocs, | |
target_disassembly | |
) | |
else: | |
return ( | |
"Compilation failed", | |
hexdump(target_bytes, result="return"), | |
-1, | |
compile_output, | |
compiled_disassembly, | |
compiled_relocs, | |
target_disassembly | |
) | |
def run(): | |
demo = gr.Interface( | |
fn=predict, | |
description=description, | |
inputs=[ | |
gr.Textbox( | |
lines=10, | |
label="Bytes of Target Function (in hex)", | |
value="b8 2a 00 00 00 c3", | |
), | |
gr.Textbox( | |
lines=10, | |
label="Decompiled C Source Code", | |
value="int x;\nint foo() { return x; }", | |
), | |
gr.Textbox(label="Compiler", value="g++"), | |
gr.Textbox(label="Compiler Flags", value="-O2"), | |
gr.Textbox(label="Architecture (objdump -m)", value="i386"), | |
gr.Textbox(label="Disassembler options (objdump -M)", value="x86-64") | |
], | |
outputs=[ | |
gr.Textbox(label="Compiled bytes"), | |
gr.Textbox(label="Target bytes"), | |
gr.Number(label="Edit distance (lower is better)"), | |
gr.Textbox(label="Compiler Output"), | |
gr.Textbox(label="Compiled Disassembly"), | |
gr.JSON(label="Compiled relocations", open=True), | |
gr.Textbox(label="Target Disassembly"), | |
], | |
) | |
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True) | |
run() | |