File size: 5,028 Bytes
94a508d
5f0a407
70ebd4a
66f8fc1
1defe4d
20b2b87
b4c7402
 
 
5f0a407
e98177c
a2767e5
80ffc07
a2767e5
 
567f66d
a2767e5
812a13e
a2767e5
 
 
 
 
812a13e
a2767e5
 
 
 
 
 
e98177c
20b2b87
b4c7402
 
 
 
e98177c
b4c7402
 
 
e98177c
b4c7402
5a649d5
d65ea20
 
 
5a649d5
d65ea20
029945b
5a649d5
e98177c
 
 
 
 
 
 
 
dc6ea20
 
 
e98177c
 
 
 
 
 
 
 
ff724df
 
e6c2c9f
ff724df
 
 
a2767e5
ff724df
6cdaa1a
 
 
 
 
 
930add3
6cdaa1a
1defe4d
 
 
 
 
 
 
d65ea20
6cdaa1a
d65ea20
6cdaa1a
ff724df
812a13e
94a508d
6cdaa1a
42886c0
d65ea20
 
 
 
 
 
 
ff724df
6cdaa1a
ff724df
d65ea20
 
 
 
 
 
 
ff724df
6cdaa1a
ff724df
d65ea20
66f8fc1
 
329f9c0
 
 
e98177c
41e9ae6
94a508d
 
 
 
 
 
 
 
0982897
94a508d
2901d44
20f12de
812a13e
 
41e9ae6
029945b
 
 
a71a75a
5a649d5
ff724df
6cdaa1a
ff724df
029945b
329f9c0
66f8fc1
22568e3
720784d
e98177c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import editdistance
import frontmatter
from hexdump2 import hexdump
import gradio as gr
import json
import shlex
import subprocess
import tempfile

description = frontmatter.load("README.md").content

def trim(str, n):
    return "\n".join(str.splitlines()[n:])

def trim_objdump(str):
    return trim(str, 7)

def disassemble_bytes(byte_data, architecture, options):
    with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as temp_bin_file:
        temp_bin_file.write(byte_data)
        temp_bin_file_name = temp_bin_file.name

    disassembly = subprocess.run(
        ["objdump", "-D", "-b", "binary", "-m", architecture, "-M", options, temp_bin_file_name],
        capture_output=True,
        text=True
    ).stdout
    disassembly = trim_objdump(disassembly)

    return disassembly

def compile(compiler, flags, source):
    # Create a temporary file for the C source code
    with tempfile.NamedTemporaryFile(suffix=".c", delete=False) as temp_c_file:
        temp_c_file.write(source.encode())
        temp_c_file_name = temp_c_file.name

    # Create a temporary file for the object file
    with tempfile.NamedTemporaryFile(suffix=".o", delete=False) as temp_o_file:
        temp_o_file_name = temp_o_file.name

    # Compile the C file to an object file
    result = subprocess.run(
        [compiler, "-c", temp_c_file_name]
        + shlex.split(flags)
        + ["-o", temp_o_file_name],
        capture_output=True,
        text=True,
    )
    compile_output = result.stdout + result.stderr

    # Create a temporary file for the raw bytes
    with tempfile.NamedTemporaryFile(suffix=".raw", delete=True) as raw_bytes_file:
        subprocess.run(
            [
                "objcopy",
                "--only-section",
                ".text",
                # XXX in reality we should probably look at the sections
                "--only-section",
                ".text.*",
                "-O",
                "binary",
                temp_o_file_name,
                raw_bytes_file.name,
            ]
        )
        compiled_bytes = raw_bytes_file.read()

    # Disassemble the object file
    disassembly = subprocess.run(
        ["objdump", "-d", temp_o_file_name],
        capture_output=True,
        text=True
    ).stdout
    disassembly = trim_objdump(disassembly)

    # Relocs
    relocs = subprocess.run(
        ["objdump", "-r", temp_o_file_name],
        capture_output=True,
        text=True
    ).stdout
    relocs = trim(relocs, 3)

    json_relocs = subprocess.run(
        ["llvm-readobj", "--elf-output-style=JSON", "--relocations", temp_o_file_name],
        capture_output=True,
        text=True,
    ).stdout
    json_relocs = json.loads(json_relocs)

    if result.returncode == 0:
        return relocs, compiled_bytes, compile_output, disassembly
    else:
        return None, None, compile_output, disassembly

def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options):
    target_bytes = bytes.fromhex(target_bytes)
    compiled_relocs, compiled_bytes, compile_output, compiled_disassembly = compile(compiler, flags, source)
    target_disassembly = disassemble_bytes(target_bytes, disasm_arch, disasm_options)

    if compiled_bytes is not None:
        return (
            hexdump(compiled_bytes, result="return"),
            hexdump(target_bytes, result="return"),
            editdistance.eval(compiled_bytes, target_bytes),
            compile_output,
            compiled_disassembly,
            compiled_relocs,
            target_disassembly
        )
    else:
        return (
            "Compilation failed",
            hexdump(target_bytes, result="return"),
            -1,
            compile_output,
            compiled_disassembly,
            compiled_relocs,
            target_disassembly
        )


def run():
    demo = gr.Interface(
        fn=predict,
        description=description,
        inputs=[
            gr.Textbox(
                lines=10,
                label="Bytes of Target Function (in hex)",
                value="b8 2a 00 00 00 c3",
            ),
            gr.Textbox(
                lines=10,
                label="Decompiled C Source Code",
                value="int foo() { return 0; }",
            ),
            gr.Textbox(label="Compiler", value="g++"),
            gr.Textbox(label="Compiler Flags", value="-O2"),
            gr.Textbox(label="Architecture (objdump -m)", value="i386"),
            gr.Textbox(label="Disassembler options (objdump -M)", value="x86-64")
        ],
        outputs=[
            gr.Textbox(label="Compiled bytes"),
            gr.Textbox(label="Target bytes"),
            gr.Number(label="Edit distance (lower is better)"),
            gr.Textbox(label="Compiler Output"),
            gr.Textbox(label="Compiled Disassembly"),
            gr.Textbox(label="Compiled relocations"),
            gr.Textbox(label="Target Disassembly"),
        ],
    )

    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)


run()