File size: 5,713 Bytes
94a508d
5f0a407
70ebd4a
66f8fc1
1defe4d
20b2b87
b4c7402
 
 
52b18ab
 
5f0a407
e98177c
a2767e5
80ffc07
a2767e5
 
567f66d
a2767e5
812a13e
a2767e5
 
 
 
 
812a13e
a2767e5
 
 
 
 
 
e98177c
20b2b87
b4c7402
 
 
 
e98177c
b4c7402
 
 
e98177c
b4c7402
5a649d5
d65ea20
 
 
5a649d5
d65ea20
029945b
5a649d5
e98177c
 
 
 
 
 
 
 
dc6ea20
 
 
e98177c
 
 
 
 
 
 
 
ff724df
 
c4e7153
ff724df
 
 
a2767e5
ff724df
6cdaa1a
b639ecc
 
 
 
 
 
6cdaa1a
1defe4d
f7b1854
1defe4d
 
 
 
687083b
989208b
47ebfff
 
1defe4d
5bf0310
 
 
 
 
 
 
 
 
 
 
 
d65ea20
b639ecc
d65ea20
6cdaa1a
ff724df
812a13e
94a508d
6cdaa1a
42886c0
d65ea20
 
 
 
 
 
 
ff724df
6cdaa1a
ff724df
d65ea20
 
 
 
 
 
 
ff724df
6cdaa1a
ff724df
d65ea20
66f8fc1
 
329f9c0
 
 
e98177c
41e9ae6
94a508d
 
 
 
 
 
 
 
1d30af4
94a508d
2901d44
20f12de
812a13e
 
41e9ae6
029945b
 
 
a71a75a
5a649d5
ff724df
a74a996
ff724df
029945b
329f9c0
66f8fc1
22568e3
720784d
e98177c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import editdistance
import frontmatter
from hexdump2 import hexdump
import gradio as gr
import json
import shlex
import subprocess
import tempfile

from dist import levenshtein_with_wildcards, print_match_summary

description = frontmatter.load("README.md").content

def trim(str, n):
    return "\n".join(str.splitlines()[n:])

def trim_objdump(str):
    return trim(str, 7)

def disassemble_bytes(byte_data, architecture, options):
    with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as temp_bin_file:
        temp_bin_file.write(byte_data)
        temp_bin_file_name = temp_bin_file.name

    disassembly = subprocess.run(
        ["objdump", "-D", "-b", "binary", "-m", architecture, "-M", options, temp_bin_file_name],
        capture_output=True,
        text=True
    ).stdout
    disassembly = trim_objdump(disassembly)

    return disassembly

def compile(compiler, flags, source):
    # Create a temporary file for the C source code
    with tempfile.NamedTemporaryFile(suffix=".c", delete=False) as temp_c_file:
        temp_c_file.write(source.encode())
        temp_c_file_name = temp_c_file.name

    # Create a temporary file for the object file
    with tempfile.NamedTemporaryFile(suffix=".o", delete=False) as temp_o_file:
        temp_o_file_name = temp_o_file.name

    # Compile the C file to an object file
    result = subprocess.run(
        [compiler, "-c", temp_c_file_name]
        + shlex.split(flags)
        + ["-o", temp_o_file_name],
        capture_output=True,
        text=True,
    )
    compile_output = result.stdout + result.stderr

    # Create a temporary file for the raw bytes
    with tempfile.NamedTemporaryFile(suffix=".raw", delete=True) as raw_bytes_file:
        subprocess.run(
            [
                "objcopy",
                "--only-section",
                ".text",
                # XXX in reality we should probably look at the sections
                "--only-section",
                ".text.*",
                "-O",
                "binary",
                temp_o_file_name,
                raw_bytes_file.name,
            ]
        )
        compiled_bytes = raw_bytes_file.read()

    # Disassemble the object file
    disassembly = subprocess.run(
        ["objdump", "-dr", temp_o_file_name],
        capture_output=True,
        text=True
    ).stdout
    disassembly = trim_objdump(disassembly)

    # Relocs
    # relocs = subprocess.run(
    #     ["objdump", "-r", temp_o_file_name],
    #     capture_output=True,
    #     text=True
    # ).stdout
    # relocs = trim(relocs, 3)

    json_relocs = subprocess.run(
        ["llvm-readobj-19", "--elf-output-style=JSON", "--relocations", temp_o_file_name],
        capture_output=True,
        text=True,
    ).stdout
    json_relocs = json.loads(json_relocs)
    json_relocs = json_relocs[0]["Relocations"]
    json_relocs = [r["Relocation"] for d in json_relocs for r in d['Relocs']]
    # Filter out .text
    json_relocs = [r for r in json_relocs if r["Symbol"]["Name"] != ".text"]

    def reloc_type2size(s):
        match s:
            case "R_X86_64_PC32":
                return 32
            case "R_X86_64_PLT32":
                return 32
            case _:
                assert False, f"Unknown reloc {s}"

    relocs = [(r["Offset"], reloc_type2size(r["Type"]["Name"])) for r in json_relocs]
    print(f"relocs: {relocs}")

    if result.returncode == 0:
        return json_relocs, compiled_bytes, compile_output, disassembly
    else:
        return None, None, compile_output, disassembly

def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options):
    target_bytes = bytes.fromhex(target_bytes)
    compiled_relocs, compiled_bytes, compile_output, compiled_disassembly = compile(compiler, flags, source)
    target_disassembly = disassemble_bytes(target_bytes, disasm_arch, disasm_options)

    if compiled_bytes is not None:
        return (
            hexdump(compiled_bytes, result="return"),
            hexdump(target_bytes, result="return"),
            editdistance.eval(compiled_bytes, target_bytes),
            compile_output,
            compiled_disassembly,
            compiled_relocs,
            target_disassembly
        )
    else:
        return (
            "Compilation failed",
            hexdump(target_bytes, result="return"),
            -1,
            compile_output,
            compiled_disassembly,
            compiled_relocs,
            target_disassembly
        )


def run():
    demo = gr.Interface(
        fn=predict,
        description=description,
        inputs=[
            gr.Textbox(
                lines=10,
                label="Bytes of Target Function (in hex)",
                value="b8 2a 00 00 00 c3",
            ),
            gr.Textbox(
                lines=10,
                label="Decompiled C Source Code",
                value="int x;\nint foo() { return x; }",
            ),
            gr.Textbox(label="Compiler", value="g++"),
            gr.Textbox(label="Compiler Flags", value="-O2"),
            gr.Textbox(label="Architecture (objdump -m)", value="i386"),
            gr.Textbox(label="Disassembler options (objdump -M)", value="x86-64")
        ],
        outputs=[
            gr.Textbox(label="Compiled bytes"),
            gr.Textbox(label="Target bytes"),
            gr.Number(label="Edit distance (lower is better)"),
            gr.Textbox(label="Compiler Output"),
            gr.Textbox(label="Compiled Disassembly"),
            gr.JSON(label="Compiled relocations", open=True),
            gr.Textbox(label="Target Disassembly"),
        ],
    )

    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)


run()