ejschwartz's picture
Add a reloc
1d30af4
import editdistance
import frontmatter
from hexdump2 import hexdump
import gradio as gr
import json
import shlex
import subprocess
import tempfile
description = frontmatter.load("README.md").content
def trim(str, n):
return "\n".join(str.splitlines()[n:])
def trim_objdump(str):
return trim(str, 7)
def disassemble_bytes(byte_data, architecture, options):
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as temp_bin_file:
temp_bin_file.write(byte_data)
temp_bin_file_name = temp_bin_file.name
disassembly = subprocess.run(
["objdump", "-D", "-b", "binary", "-m", architecture, "-M", options, temp_bin_file_name],
capture_output=True,
text=True
).stdout
disassembly = trim_objdump(disassembly)
return disassembly
def compile(compiler, flags, source):
# Create a temporary file for the C source code
with tempfile.NamedTemporaryFile(suffix=".c", delete=False) as temp_c_file:
temp_c_file.write(source.encode())
temp_c_file_name = temp_c_file.name
# Create a temporary file for the object file
with tempfile.NamedTemporaryFile(suffix=".o", delete=False) as temp_o_file:
temp_o_file_name = temp_o_file.name
# Compile the C file to an object file
result = subprocess.run(
[compiler, "-c", temp_c_file_name]
+ shlex.split(flags)
+ ["-o", temp_o_file_name],
capture_output=True,
text=True,
)
compile_output = result.stdout + result.stderr
# Create a temporary file for the raw bytes
with tempfile.NamedTemporaryFile(suffix=".raw", delete=True) as raw_bytes_file:
subprocess.run(
[
"objcopy",
"--only-section",
".text",
# XXX in reality we should probably look at the sections
"--only-section",
".text.*",
"-O",
"binary",
temp_o_file_name,
raw_bytes_file.name,
]
)
compiled_bytes = raw_bytes_file.read()
# Disassemble the object file
disassembly = subprocess.run(
["objdump", "-d", temp_o_file_name],
capture_output=True,
text=True
).stdout
disassembly = trim_objdump(disassembly)
# Relocs
# relocs = subprocess.run(
# ["objdump", "-r", temp_o_file_name],
# capture_output=True,
# text=True
# ).stdout
# relocs = trim(relocs, 3)
json_relocs = subprocess.run(
["llvm-readobj-19", "--elf-output-style=JSON", "--relocations", temp_o_file_name],
capture_output=True,
text=True,
).stdout
json_relocs = json.loads(json_relocs)
json_relocs = json_relocs[0]["Relocations"]
json_relocs = [r["Relocation"] for d in json_relocs for r in d['Relocs']]
# Filter out .text
json_relocs = [r for r in json_relocs if r["Symbol"]["Name"] != ".text"]
if result.returncode == 0:
return json_relocs, compiled_bytes, compile_output, disassembly
else:
return None, None, compile_output, disassembly
def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options):
target_bytes = bytes.fromhex(target_bytes)
compiled_relocs, compiled_bytes, compile_output, compiled_disassembly = compile(compiler, flags, source)
target_disassembly = disassemble_bytes(target_bytes, disasm_arch, disasm_options)
if compiled_bytes is not None:
return (
hexdump(compiled_bytes, result="return"),
hexdump(target_bytes, result="return"),
editdistance.eval(compiled_bytes, target_bytes),
compile_output,
compiled_disassembly,
compiled_relocs,
target_disassembly
)
else:
return (
"Compilation failed",
hexdump(target_bytes, result="return"),
-1,
compile_output,
compiled_disassembly,
compiled_relocs,
target_disassembly
)
def run():
demo = gr.Interface(
fn=predict,
description=description,
inputs=[
gr.Textbox(
lines=10,
label="Bytes of Target Function (in hex)",
value="b8 2a 00 00 00 c3",
),
gr.Textbox(
lines=10,
label="Decompiled C Source Code",
value="int x;\nint foo() { return x; }",
),
gr.Textbox(label="Compiler", value="g++"),
gr.Textbox(label="Compiler Flags", value="-O2"),
gr.Textbox(label="Architecture (objdump -m)", value="i386"),
gr.Textbox(label="Disassembler options (objdump -M)", value="x86-64")
],
outputs=[
gr.Textbox(label="Compiled bytes"),
gr.Textbox(label="Target bytes"),
gr.Number(label="Edit distance (lower is better)"),
gr.Textbox(label="Compiler Output"),
gr.Textbox(label="Compiled Disassembly"),
gr.JSON(label="Compiled relocations", open=True),
gr.Textbox(label="Target Disassembly"),
],
)
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
run()