File size: 5,713 Bytes
94a508d 5f0a407 70ebd4a 66f8fc1 1defe4d 20b2b87 b4c7402 52b18ab 5f0a407 e98177c a2767e5 80ffc07 a2767e5 567f66d a2767e5 812a13e a2767e5 812a13e a2767e5 e98177c 20b2b87 b4c7402 e98177c b4c7402 e98177c b4c7402 5a649d5 d65ea20 5a649d5 d65ea20 029945b 5a649d5 e98177c dc6ea20 e98177c ff724df c4e7153 ff724df a2767e5 ff724df 6cdaa1a b639ecc 6cdaa1a 1defe4d f7b1854 1defe4d 687083b 989208b 47ebfff 1defe4d 5bf0310 d65ea20 b639ecc d65ea20 6cdaa1a ff724df 812a13e 94a508d 6cdaa1a 42886c0 d65ea20 ff724df 6cdaa1a ff724df d65ea20 ff724df 6cdaa1a ff724df d65ea20 66f8fc1 329f9c0 e98177c 41e9ae6 94a508d 1d30af4 94a508d 2901d44 20f12de 812a13e 41e9ae6 029945b a71a75a 5a649d5 ff724df a74a996 ff724df 029945b 329f9c0 66f8fc1 22568e3 720784d e98177c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
import editdistance
import frontmatter
from hexdump2 import hexdump
import gradio as gr
import json
import shlex
import subprocess
import tempfile
from dist import levenshtein_with_wildcards, print_match_summary
description = frontmatter.load("README.md").content
def trim(str, n):
return "\n".join(str.splitlines()[n:])
def trim_objdump(str):
return trim(str, 7)
def disassemble_bytes(byte_data, architecture, options):
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as temp_bin_file:
temp_bin_file.write(byte_data)
temp_bin_file_name = temp_bin_file.name
disassembly = subprocess.run(
["objdump", "-D", "-b", "binary", "-m", architecture, "-M", options, temp_bin_file_name],
capture_output=True,
text=True
).stdout
disassembly = trim_objdump(disassembly)
return disassembly
def compile(compiler, flags, source):
# Create a temporary file for the C source code
with tempfile.NamedTemporaryFile(suffix=".c", delete=False) as temp_c_file:
temp_c_file.write(source.encode())
temp_c_file_name = temp_c_file.name
# Create a temporary file for the object file
with tempfile.NamedTemporaryFile(suffix=".o", delete=False) as temp_o_file:
temp_o_file_name = temp_o_file.name
# Compile the C file to an object file
result = subprocess.run(
[compiler, "-c", temp_c_file_name]
+ shlex.split(flags)
+ ["-o", temp_o_file_name],
capture_output=True,
text=True,
)
compile_output = result.stdout + result.stderr
# Create a temporary file for the raw bytes
with tempfile.NamedTemporaryFile(suffix=".raw", delete=True) as raw_bytes_file:
subprocess.run(
[
"objcopy",
"--only-section",
".text",
# XXX in reality we should probably look at the sections
"--only-section",
".text.*",
"-O",
"binary",
temp_o_file_name,
raw_bytes_file.name,
]
)
compiled_bytes = raw_bytes_file.read()
# Disassemble the object file
disassembly = subprocess.run(
["objdump", "-dr", temp_o_file_name],
capture_output=True,
text=True
).stdout
disassembly = trim_objdump(disassembly)
# Relocs
# relocs = subprocess.run(
# ["objdump", "-r", temp_o_file_name],
# capture_output=True,
# text=True
# ).stdout
# relocs = trim(relocs, 3)
json_relocs = subprocess.run(
["llvm-readobj-19", "--elf-output-style=JSON", "--relocations", temp_o_file_name],
capture_output=True,
text=True,
).stdout
json_relocs = json.loads(json_relocs)
json_relocs = json_relocs[0]["Relocations"]
json_relocs = [r["Relocation"] for d in json_relocs for r in d['Relocs']]
# Filter out .text
json_relocs = [r for r in json_relocs if r["Symbol"]["Name"] != ".text"]
def reloc_type2size(s):
match s:
case "R_X86_64_PC32":
return 32
case "R_X86_64_PLT32":
return 32
case _:
assert False, f"Unknown reloc {s}"
relocs = [(r["Offset"], reloc_type2size(r["Type"]["Name"])) for r in json_relocs]
print(f"relocs: {relocs}")
if result.returncode == 0:
return json_relocs, compiled_bytes, compile_output, disassembly
else:
return None, None, compile_output, disassembly
def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options):
target_bytes = bytes.fromhex(target_bytes)
compiled_relocs, compiled_bytes, compile_output, compiled_disassembly = compile(compiler, flags, source)
target_disassembly = disassemble_bytes(target_bytes, disasm_arch, disasm_options)
if compiled_bytes is not None:
return (
hexdump(compiled_bytes, result="return"),
hexdump(target_bytes, result="return"),
editdistance.eval(compiled_bytes, target_bytes),
compile_output,
compiled_disassembly,
compiled_relocs,
target_disassembly
)
else:
return (
"Compilation failed",
hexdump(target_bytes, result="return"),
-1,
compile_output,
compiled_disassembly,
compiled_relocs,
target_disassembly
)
def run():
demo = gr.Interface(
fn=predict,
description=description,
inputs=[
gr.Textbox(
lines=10,
label="Bytes of Target Function (in hex)",
value="b8 2a 00 00 00 c3",
),
gr.Textbox(
lines=10,
label="Decompiled C Source Code",
value="int x;\nint foo() { return x; }",
),
gr.Textbox(label="Compiler", value="g++"),
gr.Textbox(label="Compiler Flags", value="-O2"),
gr.Textbox(label="Architecture (objdump -m)", value="i386"),
gr.Textbox(label="Disassembler options (objdump -M)", value="x86-64")
],
outputs=[
gr.Textbox(label="Compiled bytes"),
gr.Textbox(label="Target bytes"),
gr.Number(label="Edit distance (lower is better)"),
gr.Textbox(label="Compiler Output"),
gr.Textbox(label="Compiled Disassembly"),
gr.JSON(label="Compiled relocations", open=True),
gr.Textbox(label="Target Disassembly"),
],
)
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
run()
|