Spaces:
Sleeping
Sleeping
File size: 4,550 Bytes
94a508d 70ebd4a 66f8fc1 20b2b87 b4c7402 e98177c e9721d6 247cd31 e98177c a2767e5 80ffc07 a2767e5 567f66d a2767e5 e98177c 20b2b87 b4c7402 e98177c b4c7402 e98177c b4c7402 5a649d5 d65ea20 5a649d5 d65ea20 029945b 5a649d5 e98177c dc6ea20 e98177c ff724df a2767e5 ff724df d65ea20 ff724df d65ea20 ff724df b4c7402 e98177c 66b746d 94a508d ff724df 66b746d d65ea20 ff724df d65ea20 ff724df d65ea20 66f8fc1 329f9c0 e98177c 41e9ae6 94a508d 0982897 94a508d 2901d44 20f12de d353e8e 41e9ae6 029945b a71a75a 5a649d5 ff724df 029945b 329f9c0 66f8fc1 22568e3 720784d e98177c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import editdistance
from hexdump2 import hexdump
import gradio as gr
import shlex
import subprocess
import tempfile
description = """This is a space testing a method for evaluating the quality of decompilation.
Currently unhandled features:
* PIC stuff
* Global references
* Function calls
* Wildcards in target function?
* How to extract compilable decompilation from decompilers?
"""
def trim(str, n):
return "\n".join(str.splitlines()[n:])
def trim_objdump(str):
return trim(str, 7)
def disassemble_bytes(byte_data, architecture):
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as temp_bin_file:
temp_bin_file.write(byte_data)
temp_bin_file_name = temp_bin_file.name
disassembly = subprocess.run(
["objdump", "-D", "-b", "binary", "-m", architecture, temp_bin_file_name],
capture_output=True,
text=True
).stdout
disassembly = trim_objdump(disassembly)
return disassembly
def compile(compiler, flags, source):
# Create a temporary file for the C source code
with tempfile.NamedTemporaryFile(suffix=".c", delete=False) as temp_c_file:
temp_c_file.write(source.encode())
temp_c_file_name = temp_c_file.name
# Create a temporary file for the object file
with tempfile.NamedTemporaryFile(suffix=".o", delete=False) as temp_o_file:
temp_o_file_name = temp_o_file.name
# Compile the C file to an object file
result = subprocess.run(
[compiler, "-c", temp_c_file_name]
+ shlex.split(flags)
+ ["-o", temp_o_file_name],
capture_output=True,
text=True,
)
compile_output = result.stdout + result.stderr
# Create a temporary file for the raw bytes
with tempfile.NamedTemporaryFile(suffix=".raw", delete=True) as raw_bytes_file:
subprocess.run(
[
"objcopy",
"--only-section",
".text",
# XXX in reality we should probably look at the sections
"--only-section",
".text.*",
"-O",
"binary",
temp_o_file_name,
raw_bytes_file.name,
]
)
compiled_bytes = raw_bytes_file.read()
# Disassemble the object file
disassembly = subprocess.run(
["objdump", "-d", temp_o_file_name],
capture_output=True,
text=True
).stdout
disassembly = trim_objdump(disassembly)
if result.returncode == 0:
return compiled_bytes, compile_output, disassembly
else:
return None, compile_output, disassembly
def predict(target_bytes, source, compiler, flags, architecture):
target_bytes = bytes.fromhex(target_bytes)
compiled_bytes, compile_output, compiled_disassembly = compile(compiler, flags, source)
target_disassembly = disassemble_bytes(target_bytes, architecture)
if compiled_bytes is not None:
return (
hexdump(compiled_bytes, result="return"),
hexdump(target_bytes, result="return"),
editdistance.eval(compiled_bytes, target_bytes),
compile_output,
compiled_disassembly,
target_disassembly
)
else:
return (
"Compilation failed",
hexdump(target_bytes, result="return"),
-1,
compile_output,
compiled_disassembly,
target_disassembly
)
def run():
demo = gr.Interface(
fn=predict,
description=description,
inputs=[
gr.Textbox(
lines=10,
label="Bytes of Target Function (in hex)",
value="b8 2a 00 00 00 c3",
),
gr.Textbox(
lines=10,
label="Decompiled C Source Code",
value="int foo() { return 0; }",
),
gr.Textbox(label="Compiler", value="g++"),
gr.Textbox(label="Compiler Flags", value="-O2"),
gr.Textbox(label="Architecture (for disassembler)", value="i386"),
],
outputs=[
gr.Textbox(label="Compiled bytes"),
gr.Textbox(label="Target bytes"),
gr.Number(label="Edit distance (lower is better)"),
gr.Textbox(label="Compiler Output"),
gr.Textbox(label="Compiled Disassembly"),
gr.Textbox(label="Target Disassembly"),
],
)
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
run()
|