File size: 7,978 Bytes
e781603
 
 
 
 
 
 
 
 
 
8770774
e781603
8770774
e781603
8770774
 
e781603
 
8770774
e781603
8770774
e781603
 
8770774
e781603
 
 
8770774
e781603
8770774
e781603
 
8770774
e781603
8770774
e781603
8770774
e781603
8770774
e781603
8770774
e781603
8770774
e781603
 
8770774
e781603
 
 
8770774
e781603
 
 
8770774
 
 
 
e781603
8770774
e781603
 
 
 
8770774
e781603
8770774
e781603
8770774
e781603
8770774
e781603
 
 
8770774
 
 
e781603
 
8770774
e781603
 
8770774
 
 
 
 
 
 
 
e781603
 
 
 
 
8770774
e781603
 
8770774
e781603
8770774
e781603
8770774
 
 
e781603
 
8770774
e781603
 
 
8770774
e781603
8770774
e781603
8770774
 
 
e781603
 
8770774
e781603
8770774
e781603
8770774
 
 
 
 
e781603
 
 
 
8770774
e781603
8770774
e781603
8770774
 
 
e781603
 
 
 
8770774
e781603
8770774
e781603
 
 
8770774
e781603
 
 
 
 
 
8770774
e781603
 
 
 
 
 
 
 
8770774
e781603
 
 
 
8770774
e781603
8770774
e781603
8770774
 
 
e781603
 
8770774
e781603
8770774
 
e781603
 
8770774
 
 
 
e781603
8770774
e781603
 
8770774
e781603
 
8770774
e781603
8770774
e781603
8770774
e781603
 
8770774
e781603
 
8770774
e781603
 
 
 
 
8770774
 
e781603
 
8770774
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
import os
import json
import glob
import traceback
from flask import Flask, render_template, jsonify, send_file, request
from flask_cors import CORS

app = Flask(__name__)
CORS(app)  # Enable CORS for all routes


# Serve the HTML viewer
@app.route("/")
def index():
    return render_template("viewer.html")


# Get list of available evaluations
@app.route("/api/evals")
def list_evals():
    base_dir = request.args.get("path", "./eval_results")
    if not os.path.exists(base_dir):
        return jsonify({"error": f"Path {base_dir} does not exist"}), 404

    eval_dirs = []
    for item in os.listdir(base_dir):
        full_path = os.path.join(base_dir, item)
        if os.path.isdir(full_path) and item.startswith("eval_"):
            eval_dirs.append(item)

    return jsonify(eval_dirs)


# Get examples for an evaluation
@app.route("/api/eval/<eval_id>/examples")
def get_examples(eval_id):
    base_dir = request.args.get("path", "./eval_results")
    eval_path = os.path.join(base_dir, eval_id)

    # Try to read examples.json
    examples_json_path = os.path.join(eval_path, "examples.json")
    examples = {}

    if os.path.exists(examples_json_path):
        try:
            with open(examples_json_path, "r") as f:
                examples = json.load(f)
        except json.JSONDecodeError:
            app.logger.error(f"Error parsing examples.json at {examples_json_path}")

    # If examples.json doesn't exist or is empty, scan for example directories
    if not examples:
        for item in os.listdir(eval_path):
            if os.path.isdir(os.path.join(eval_path, item)) and item.startswith(
                "example_"
            ):
                example_id = item.replace("example_", "")
                example_dir = os.path.join(eval_path, item)

                # Find the first run and read task.txt
                run_dirs = []
                for run_item in os.listdir(example_dir):
                    run_path = os.path.join(example_dir, run_item)
                    if os.path.isdir(run_path) and run_item.startswith("run_"):
                        run_dirs.append(run_item)

                if run_dirs:
                    task_path = os.path.join(example_dir, run_dirs[0], "task.txt")
                    if os.path.exists(task_path):
                        with open(task_path, "r") as f:
                            examples[example_id] = f.read().strip()
                    else:
                        # If no task.txt, try reading from metadata.json
                        metadata_path = os.path.join(
                            example_dir, run_dirs[0], "metadata.json"
                        )
                        if os.path.exists(metadata_path):
                            try:
                                with open(metadata_path, "r") as f:
                                    metadata = json.load(f)
                                    # Look for task in summary[0].task
                                    if (
                                        "summary" in metadata
                                        and metadata["summary"]
                                        and "task" in metadata["summary"][0]
                                    ):
                                        examples[example_id] = metadata["summary"][0][
                                            "task"
                                        ]
                            except:
                                # Default to directory name if all else fails
                                examples[example_id] = f"Task for {example_id}"
                        else:
                            examples[example_id] = f"Task for {example_id}"

    return jsonify(examples)


# Get runs for an example
@app.route("/api/eval/<eval_id>/example/<example_id>/runs")
def get_runs(eval_id, example_id):
    base_dir = request.args.get("path", "./eval_results")
    example_dir = os.path.join(base_dir, eval_id, f"example_{example_id}")

    if not os.path.exists(example_dir):
        return jsonify({"error": f"Example directory not found: {example_dir}"}), 404

    runs = []
    for item in os.listdir(example_dir):
        item_path = os.path.join(example_dir, item)
        if os.path.isdir(item_path) and item.startswith("run_"):
            run_id = item

            # Try to get status from metadata.json
            metadata_path = os.path.join(item_path, "metadata.json")
            status = "unknown"

            if os.path.exists(metadata_path):
                try:
                    with open(metadata_path, "r") as f:
                        metadata = json.load(f)
                        status = metadata.get("status", "unknown")
                except Exception as e:
                    app.logger.error(
                        f"Error reading metadata.json for {run_id}: {str(e)}"
                    )

            runs.append({"id": run_id, "status": status})
    app.logger.info(f"runs: {runs}")

    return jsonify(runs)


# Get metadata for a run
@app.route("/api/eval/<eval_id>/example/<example_id>/run/<run_id>/metadata")
def get_metadata(eval_id, example_id, run_id):
    base_dir = request.args.get("path", "./eval_results")
    run_dir = os.path.join(base_dir, eval_id, f"example_{example_id}", run_id)
    metadata_path = os.path.join(run_dir, "metadata.json")
    app.logger.info(f"metadata: {metadata_path}")

    if not os.path.exists(metadata_path):
        return jsonify({"error": "Metadata not found", "path": metadata_path}), 404

    try:
        with open(metadata_path, "r") as f:
            metadata_content = f.read()
            if not metadata_content.strip():
                return jsonify({"error": "Metadata file is empty"}), 404

            metadata = json.loads(metadata_content)
            return jsonify(metadata)
    except json.JSONDecodeError as e:
        error_info = {
            "error": "Invalid JSON in metadata file",
            "details": str(e),
            "path": metadata_path,
        }
        app.logger.error(f"JSON error in {metadata_path}: {str(e)}")
        return jsonify(error_info), 400
    except Exception as e:
        error_info = {
            "error": "Error reading metadata file",
            "details": str(e),
            "traceback": traceback.format_exc(),
            "path": metadata_path,
        }
        app.logger.error(f"Error reading {metadata_path}: {str(e)}")
        return jsonify(error_info), 500


# Get screenshots for a run
@app.route("/api/eval/<eval_id>/example/<example_id>/run/<run_id>/screenshots")
def get_screenshots(eval_id, example_id, run_id):
    base_dir = request.args.get("path", "./eval_results")
    run_dir = os.path.join(base_dir, eval_id, f"example_{example_id}", run_id)

    if not os.path.exists(run_dir):
        return jsonify({"error": f"Run directory not found: {run_dir}"}), 404

    screenshots = []
    for ext in ["png", "jpg", "jpeg"]:
        pattern = os.path.join(run_dir, f"*.{ext}")
        for file_path in glob.glob(pattern):
            filename = os.path.basename(file_path)
            screenshots.append(
                {"name": filename, "path": f"/api/image?path={file_path}"}
            )

    # Sort by filename
    screenshots.sort(key=lambda x: x["name"])

    app.logger.info(f"screenshots: {screenshots}")

    return jsonify(screenshots)


# Serve an image file
@app.route("/api/image")
def get_image():
    path = request.args.get("path")
    if not path:
        return jsonify({"error": "No path provided"}), 400

    if not os.path.exists(path):
        return jsonify({"error": f"Image not found at path: {path}"}), 404

    try:
        return send_file(path)
    except Exception as e:
        return jsonify({"error": f"Error serving image: {str(e)}"}), 500


if __name__ == "__main__":
    print("Evaluation Server is running at http://localhost:8000")
    print("Press Ctrl+C to stop the server")

    app.run(debug=True, port=8000)