File size: 2,735 Bytes
f3c542d
 
 
76a84b9
6a39178
 
f3c542d
 
76a84b9
f3c542d
76a84b9
 
 
 
f3c542d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76a84b9
 
 
 
 
 
 
 
f3c542d
76a84b9
 
f3c542d
 
 
 
76a84b9
f3c542d
76a84b9
 
f3c542d
 
 
 
76a84b9
f3c542d
 
76a84b9
 
 
 
f3c542d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
TITLE = """<h1 align="center" id="space-title">πŸ† Online Mind2Web Leaderboard</h1>"""

INTRODUCTION_TEXT = """
Online Mind2Web is a benchmark designed to evaluate real-world performance of web agents on online websites.

[[Blog]]() [[Paper]]() [[Code]]() [[Data]]()

## Tasks
Online Mind2Web includes 300 tasks from 136 popular websites across various domains. It covers a diverse set of user tasks, to evaluate agents' performance in real-world environments.

Tasks are categorized into three difficulty levels based on the steps human annotators need:
- Easy: 1 - 5
- Medium: 6 - 10 
- Hard: 11 +

## Leaderboard
"""

SUBMISSION_TEXT = """
## Submissions
Participants are invited to submit your agent's trajectory to test. The submissions will be evaluated based on our auto-eval.

### Format of submission
Submissions must include a sequence of images (i.e., screenshots in the trajectory) and a result.json file for each task. The JSON file should contain the fields: "Task", "Task_id", and "action_history". You can refer to an example of the submission files.
"""

CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""
Online Mind2Web"""

SUBMIT_INTRODUCTION = """
## ⚠ Please submit the trajectory file with the following format:
Each task is stored in a folder named after its `task_id`, containing:

- `trajectory/`: Stores screenshots of each step.
- `result.json`: Task metadata and action history.

**Structure:**
```
main_directory/
└── task_id/
    β”œβ”€β”€ result.json
    └── trajectory/
        β”œβ”€β”€ 0_screenshot.png
        β”œβ”€β”€ 1_screenshot.png
        └── ...
```

**`result.json` format:**
```json
{
    "task_id": 123,
    "task": "abc",
    "action_history": ["abc", "xyz", "..."]
}
```
Please send your agent's name, model family, and organization via email to [email protected], along with the trajectory directory attached.

We will run the auto-evaluation. If you have conducted your own human evaluation, please also attach your human eval resultsβ€”we will spot-check these before adding them to the human-eval table.

"""
DATA_DATASET = """## More Statistics for Online Mind2Web Benchmark
"""


def format_error(msg):
    return f"<p style='color: red; font-size: 20px; text-align: center;'>{msg}</p>"

def format_warning(msg):
    return f"<p style='color: orange; font-size: 20px; text-align: center;'>{msg}</p>"

def format_log(msg):
    return f"<p style='color: green; font-size: 20px; text-align: center;'>{msg}</p>"

def model_hyperlink(link, model_name):
    return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'