Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -39,7 +39,7 @@ def page_0(query):
|
|
39 |
query = tokenize_sentence(query)
|
40 |
hits = searcher.search(query, k=NUM_PAGES*RESULTS_PER_PAGE)
|
41 |
ix = [int(hit.docid) for hit in hits]
|
42 |
-
results = ds.select(ix).shard(num_shards=NUM_PAGES, index=0, contiguous=True)
|
43 |
results = format_results(results, untokenized_query)
|
44 |
return results, [ix], gr.update(visible=True), untokenized_query
|
45 |
|
@@ -51,7 +51,8 @@ def page_i(i, ix, query):
|
|
51 |
|
52 |
with gr.Blocks(css="#b {min-width:15px;background:transparent;}") as demo: #border:white;box-shadow:none;
|
53 |
with gr.Row():
|
54 |
-
gr.Markdown(value="""## <p style="text-align: center;"> Code search </p>""")
|
|
|
55 |
with gr.Row():
|
56 |
with gr.Column(scale=1):
|
57 |
result_list = gr.Dataframe(type="array", visible=False, col_count=1)
|
|
|
39 |
query = tokenize_sentence(query)
|
40 |
hits = searcher.search(query, k=NUM_PAGES*RESULTS_PER_PAGE)
|
41 |
ix = [int(hit.docid) for hit in hits]
|
42 |
+
results = ds.select(ix).shard(num_shards=NUM_PAGES, index=0, contiguous=True)
|
43 |
results = format_results(results, untokenized_query)
|
44 |
return results, [ix], gr.update(visible=True), untokenized_query
|
45 |
|
|
|
51 |
|
52 |
with gr.Blocks(css="#b {min-width:15px;background:transparent;}") as demo: #border:white;box-shadow:none;
|
53 |
with gr.Row():
|
54 |
+
gr.Markdown(value="""## <p style="text-align: center;"> Code search </p>""")
|
55 |
+
gr.Markdown("This search tool was used to validate tokenization scheme for code retrieval for the BigCode project. We indexed the [Santacoder](https://huggingface.co/bigcode/santacoder) training dataset and use a (2,4)-gram tokenizer to build the index. This is the same tokenization scheme that ended up being used to index the (StarCoder)[https://huggingface.co/spaces/bigcode/search] dataset.")
|
56 |
with gr.Row():
|
57 |
with gr.Column(scale=1):
|
58 |
result_list = gr.Dataframe(type="array", visible=False, col_count=1)
|