Spaces:
Runtime error
Runtime error
File size: 4,841 Bytes
2171b06 72c2877 a9e00bb 2171b06 72c2877 2171b06 72c2877 2171b06 a9e00bb 2171b06 72c2877 2171b06 72c2877 2171b06 72c2877 2171b06 72c2877 2171b06 72c2877 2171b06 a9e00bb 2171b06 72c2877 2171b06 72c2877 2171b06 72c2877 2171b06 72c2877 2171b06 72c2877 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import json
import requests
import streamlit as st
st.set_page_config(layout="wide")
with open("utils/table_contents.md", "r") as f:
contents = f.read()
st.sidebar.markdown(contents)
st.title("The Stack Bot 💬")
intro = """
The Stack Bot is a tool to help you get started with tools developed in [BigCode](https://huggingface.co/bigcode),
such as [The Stack](https://huggingface.co/bigcode/the-stack) dataset and [SantaCoder](https://huggingface.co/bigcode/santacoder) model.
"""
st.markdown(intro, unsafe_allow_html=True)
@st.cache()
def load_languages():
with open("utils/languages.json", "r") as f:
languages = json.load(f)
return languages
def how_to_load(language):
text = f"""
```python
from datasets import load_dataset
dataset = load_dataset("bigcode/the-stack", data_dir="data/{language}", split="train")
# print first element
print(dataset[0])
```
"""
st.markdown(text)
def load_model(values, language):
model = values["model"]
if not model:
text = f"""No model available for {language.capitalize()}. If you trained a model on this language, let us know in\
in the [Community tab](https://huggingface.co/spaces/loubnabnl/the-stack-bot/discussions) to feature your model!\n\
You can also train your own model on The Stack using the instructions below 🚀"""
st.write(text)
if st.button("Fine-tune your own model", key=4):
st.write("Code available at [GitHub link] + add preview")
else:
text = f"""{model} is a model that was trained on the {language.capitalize()} subset of The Stack. Here's how to use it:"""
code = f"""
```python
from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained({model})
model = AutoModelForCausalLM.from_pretrained({model}, trust_remote_code=True)
inputs = tokenizer.encode("def print_hello_world():", return_tensors="pt")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))
```
"""
st.write(text)
st.markdown(code)
st.write(f"The scores of this model are the following: {values['scores']}")
def generate_code(
demo, gen_prompt, max_new_tokens=40, temperature=0.2, seed=0
):
# call space using its API endpoint
try:
url = (
f"{demo}/run/predict/"
)
r = requests.post(
url=url, json={"data": [gen_prompt, max_new_tokens, temperature, seed]}
)
generated_text = r.json()["data"][0]
except:
generated_text = ""
return generated_text
def init_nested_buttons():
if "Models trained on dataset" not in st.session_state:
st.session_state["Models trained on dataset"] = False
if "Generate code" not in st.session_state:
st.session_state["Generate code"] = False
if st.button("Models trained on dataset"):
st.session_state["Models trained on dataset"] = not st.session_state["Models trained on dataset"]
languages = load_languages()
col1, col2 = st.columns([1, 1.5])
with col1:
selected_language = st.selectbox("Select one of 358 languages in The Stack", list(languages.keys()), key=1)
st.write(f"Here's how you can load the {selected_language.capitalize()} subset of The Stack:")
code = how_to_load(selected_language)
if st.button("More info about the dataset", key=2):
st.write(f"The dataset contains {languages[selected_language]['num_examples']} examples.")
# we can add some stats about files
init_nested_buttons()
if st.session_state["Models trained on dataset"]:
load_model(languages[selected_language], selected_language)
if languages[selected_language]["model"] and languages[selected_language]["gradio_demo"]:
st.write(f"Here's a demo to try the model, for more flexibilty you can use the [Gradio demo]({languages[selected_language]['gradio_demo']}).")
gen_prompt = st.text_area(
"Generate code with prompt:",
value="# Implement a function to print hello world",
height=100,
).strip()
if st.button("Generate code"):
st.session_state["Generate code"] = not st.session_state["Generate code"]
if st.session_state["Generate code"]:
with st.spinner("Generating code..."):
generated_text = generate_code(
demo=languages[selected_language]["gradio_demo"],
gen_prompt=gen_prompt,
)
if not generated_text:
st.markdown(f"Error: could not generate code. Make sure the Gradio demo at [{languages[selected_language]['gradio_demo']}]({languages[selected_language]['gradio_demo']}) works.")
else:
st.code(generated_text) |