Alina Lozovskaya commited on
Commit
d1ed69b
·
1 Parent(s): 4111351

First commit

Browse files
Files changed (5) hide show
  1. .gitignore +45 -0
  2. Dockerfile +30 -0
  3. app.py +152 -0
  4. pyproject.toml +26 -0
  5. uv.lock +0 -0
.gitignore ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ logs/
6
+ log_backups/
7
+ plots/
8
+ files/
9
+ .gradio/
10
+ fr.sh
11
+ repo_contents.txt
12
+ # C extensions
13
+ *.so
14
+
15
+ # Distribution / packaging
16
+ .Python
17
+ build/
18
+ develop-eggs/
19
+ dist/
20
+ downloads/
21
+ eggs/
22
+ .eggs/
23
+ lib/
24
+ lib64/
25
+ parts/
26
+ sdist/
27
+ var/
28
+ wheels/
29
+ share/python-wheels/
30
+ *.egg-info/
31
+ .installed.cfg
32
+ *.egg
33
+ MANIFEST
34
+
35
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
36
+ __pypackages__/
37
+
38
+ # Environments
39
+ .env
40
+ .venv
41
+ env/
42
+ venv/
43
+ ENV/
44
+ env.bak/
45
+ venv.bak/
Dockerfile ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.12.1 slim image as base
2
+ FROM python:3.12.1-slim
3
+
4
+ # Install dependencies required for UV and Python packages
5
+ RUN apt-get update && apt-get install -y --no-install-recommends \
6
+ curl ca-certificates git && \
7
+ rm -rf /var/lib/apt/lists/*
8
+
9
+ # Install UV (fast Python dependency manager)
10
+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh
11
+
12
+ # Ensure UV is available in PATH
13
+ ENV PATH="/root/.local/bin:$PATH"
14
+
15
+ # Set working directory
16
+ WORKDIR /app
17
+
18
+ # Copy pyproject and install dependencies using UV
19
+ COPY pyproject.toml .
20
+ RUN uv venv && uv sync
21
+
22
+ # Copy application code
23
+ COPY app.py .
24
+
25
+ # Expose Gradio app port
26
+ EXPOSE 7860
27
+ ENV GRADIO_SERVER_NAME="0.0.0.0"
28
+
29
+ # Entrypoint to run the Gradio app
30
+ ENTRYPOINT ["uv", "run", "python", "app.py"]
app.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import pathlib
4
+ import threading
5
+ import shutil
6
+ import gradio as gr
7
+ import yaml
8
+ import io
9
+
10
+ from loguru import logger
11
+ from yourbench.pipeline import run_pipeline
12
+
13
+ UPLOAD_DIRECTORY = pathlib.Path("/app/uploaded_files")
14
+ UPLOAD_DIRECTORY.mkdir(parents=True, exist_ok=True)
15
+
16
+ CONFIG_PATH = pathlib.Path("/app/yourbench_config.yml")
17
+
18
+ yourbench_log_stream = io.StringIO()
19
+
20
+ def custom_log_handler(message):
21
+ yourbench_log_stream.write(message + "\n")
22
+ # yourbench_log_stream.flush()
23
+
24
+ def get_log_content():
25
+ yourbench_log_stream.seek(0)
26
+ content = yourbench_log_stream.read()
27
+ print(len(content))
28
+ return content
29
+
30
+ logger.add(custom_log_handler, filter="yourbench")
31
+
32
+ def start_task():
33
+ # Start the long-running task in a separate thread
34
+ task_thread = threading.Thread(target=run_pipeline, args=(CONFIG_PATH,), daemon=True)
35
+ task_thread.start()
36
+ task_thread.join()
37
+
38
+ def generate_config(
39
+ hf_token,
40
+ hf_org,
41
+ model_name,
42
+ provider,
43
+ base_url,
44
+ api_key,
45
+ max_concurrent_requests,
46
+ ingestion_source,
47
+ ingestion_output,
48
+ run_ingestion,
49
+ summarization_source,
50
+ summarization_output,
51
+ run_summarization
52
+ ):
53
+
54
+ """Generates a config.yaml based on user inputs"""
55
+ config = {
56
+ "hf_configuration": {
57
+ "token": hf_token,
58
+ "private": True,
59
+ "hf_organization": hf_org
60
+ },
61
+ "model_list": [{
62
+ "model_name": model_name,
63
+ "provider": provider,
64
+ "base_url": base_url,
65
+ "api_key": api_key,
66
+ "max_concurrent_requests": max_concurrent_requests
67
+ }],
68
+ "pipeline": {
69
+ "ingestion": {
70
+ "source_documents_dir": ingestion_source,
71
+ "output_dir": ingestion_output,
72
+ "run": run_ingestion
73
+ },
74
+ "summarization": {
75
+ "source_dataset_name": summarization_source,
76
+ "output_dataset_name": summarization_output,
77
+ "run": run_summarization
78
+ }
79
+ }
80
+ }
81
+ return yaml.dump(config, default_flow_style=False)
82
+
83
+ def save_config(yaml_text):
84
+ with open(CONFIG_PATH, "w") as file:
85
+ file.write(yaml_text)
86
+ return "✅ Config saved as config.yaml!"
87
+
88
+
89
+ def save_files(files: list[str]):
90
+ saved_paths = []
91
+ for file in files:
92
+ file_path = pathlib.Path(file)
93
+ save_path = UPLOAD_DIRECTORY / file_path.name
94
+ shutil.move(str(file_path), str(save_path))
95
+ saved_paths.append(str(save_path))
96
+ return f"Files have been successfully saved to: {', '.join(saved_paths)}"
97
+
98
+ def start_youbench():
99
+ run_pipeline(CONFIG_PATH, debug=False)
100
+
101
+ app = gr.Blocks()
102
+
103
+ with app:
104
+ gr.Markdown("## YourBench Configuration")
105
+
106
+ with gr.Tab("HF Configuration"):
107
+ hf_token = gr.Textbox(label="HF Token")
108
+ hf_org = gr.Textbox(label="HF Organization")
109
+
110
+ with gr.Tab("Model Settings"):
111
+ model_name = gr.Textbox(label="Model Name")
112
+ provider = gr.Dropdown(["openrouter", "openai", "huggingface"], value="huggingface", label="Provider")
113
+ base_url = gr.Textbox(label="Base URL")
114
+ api_key = gr.Textbox(label="API Key")
115
+ max_concurrent_requests = gr.Dropdown([8, 16, 32], value=16, label="Max Concurrent Requests")
116
+
117
+ with gr.Tab("Pipeline Stages"):
118
+ ingestion_source = gr.Textbox(label="Ingestion Source Directory")
119
+ ingestion_output = gr.Textbox(label="Ingestion Output Directory")
120
+ run_ingestion = gr.Checkbox(label="Run Ingestion", value=False)
121
+ summarization_source = gr.Textbox(label="Summarization Source Dataset")
122
+ summarization_output = gr.Textbox(label="Summarization Output Dataset")
123
+ run_summarization = gr.Checkbox(label="Run Summarization", value=False)
124
+
125
+ with gr.Tab("Config"):
126
+ config_output = gr.Code(label="Generated Config", language="yaml")
127
+ preview_button = gr.Button("Generate Config")
128
+ save_button = gr.Button("Save Config")
129
+
130
+ preview_button.click(generate_config,
131
+ inputs=[hf_token, hf_org, model_name, provider, base_url, api_key,
132
+ max_concurrent_requests, ingestion_source, ingestion_output,
133
+ run_ingestion, summarization_source, summarization_output, run_summarization],
134
+ outputs=config_output)
135
+
136
+ save_button.click(save_config, inputs=[config_output], outputs=[gr.Textbox(label="Save Status")])
137
+
138
+ with gr.Tab("Files"):
139
+ file_input = gr.File(label="Upload text files", file_count="multiple", file_types=[".txt", ".md", ".html"])
140
+ file_explorer = gr.FileExplorer(root_dir=UPLOAD_DIRECTORY, interactive=False, label="Current Files")
141
+ output = gr.Textbox(label="Log")
142
+ file_input.upload(save_files, file_input, output)
143
+
144
+
145
+ with gr.Tab("Run Generation"):
146
+ log_output = gr.Code(label="Log Output", language=None,lines=20, interactive=False)
147
+ start_button = gr.Button("Start Long-Running Task")
148
+ timer = gr.Timer(0.5, active=True)
149
+ timer.tick(get_log_content, outputs=log_output)
150
+ start_button.click(start_task)
151
+
152
+ app.launch()
pyproject.toml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "yourbench-space"
3
+ version = "0.1.0"
4
+ requires-python = ">=3.12, <3.13"
5
+
6
+ dependencies = [
7
+ "yourbench @ git+https://github.com/huggingface/[email protected]",
8
+ "asyncio>=3.4.3",
9
+ "datasets>=3.3.0",
10
+ "gradio>=5.20.0",
11
+ "hf-transfer>=0.1.9",
12
+ "langfuse>=2.59.3",
13
+ "litellm>=1.61.16",
14
+ "loguru>=0.7.3",
15
+ "markitdown>=0.0.1a4",
16
+ "matplotlib>=3.10.0",
17
+ "openai>=1.63.0",
18
+ "python-dotenv>=1.0.1",
19
+ "torch>=2.6.0",
20
+ "tqdm>=4.67.1",
21
+ "transformers>=4.48.3",
22
+ ]
23
+
24
+ [build-system]
25
+ requires = ["setuptools>=61.0"]
26
+ build-backend = "setuptools.build_meta"
uv.lock ADDED
The diff for this file is too large to render. See raw diff