Sophia Koehler
commited on
Commit
·
a8a9cd5
1
Parent(s):
2fa43bc
fix3
Browse files
app.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
|
3 |
-
from dataclasses import dataclass
|
4 |
import pickle
|
5 |
import os
|
6 |
-
from
|
|
|
7 |
from nlp4web_codebase.ir.data_loaders.dm import Document
|
8 |
from collections import Counter
|
9 |
import tqdm
|
@@ -11,6 +12,10 @@ import re
|
|
11 |
import nltk
|
12 |
nltk.download("stopwords", quiet=True)
|
13 |
from nltk.corpus import stopwords as nltk_stopwords
|
|
|
|
|
|
|
|
|
14 |
|
15 |
LANGUAGE = "english"
|
16 |
word_splitter = re.compile(r"(?u)\b\w\w+\b").findall
|
@@ -133,21 +138,8 @@ def run_counting(
|
|
133 |
doc_texts=doc_texts,
|
134 |
)
|
135 |
|
136 |
-
from nlp4web_codebase.ir.data_loaders.sciq import load_sciq
|
137 |
-
sciq = load_sciq()
|
138 |
-
counting = run_counting(documents=iter(sciq.corpus), ndocs=len(sciq.corpus))
|
139 |
-
|
140 |
"""### BM25 Index"""
|
141 |
|
142 |
-
from __future__ import annotations
|
143 |
-
from dataclasses import asdict, dataclass
|
144 |
-
import math
|
145 |
-
import os
|
146 |
-
from typing import Iterable, List, Optional, Type
|
147 |
-
import tqdm
|
148 |
-
from nlp4web_codebase.ir.data_loaders.dm import Document
|
149 |
-
|
150 |
-
|
151 |
@dataclass
|
152 |
class BM25Index(InvertedIndex):
|
153 |
|
@@ -237,11 +229,6 @@ class BM25Index(InvertedIndex):
|
|
237 |
|
238 |
"""### BM25 Retriever"""
|
239 |
|
240 |
-
from nlp4web_codebase.ir.models import BaseRetriever
|
241 |
-
from typing import Type
|
242 |
-
from abc import abstractmethod
|
243 |
-
|
244 |
-
|
245 |
class BaseInvertedIndexRetriever(BaseRetriever):
|
246 |
|
247 |
@property
|
@@ -301,9 +288,6 @@ class BM25Retriever(BaseInvertedIndexRetriever):
|
|
301 |
return BM25Index
|
302 |
|
303 |
|
304 |
-
import gradio as gr
|
305 |
-
from typing import TypedDict
|
306 |
-
|
307 |
class Hit(TypedDict):
|
308 |
cid: str
|
309 |
score: float
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
|
3 |
+
from dataclasses import asdict, dataclass
|
4 |
import pickle
|
5 |
import os
|
6 |
+
from __future__ import annotations
|
7 |
+
from typing import Iterable, Callable, List, Dict, Optional, Type, TypeVar, TypedDict
|
8 |
from nlp4web_codebase.ir.data_loaders.dm import Document
|
9 |
from collections import Counter
|
10 |
import tqdm
|
|
|
12 |
import nltk
|
13 |
nltk.download("stopwords", quiet=True)
|
14 |
from nltk.corpus import stopwords as nltk_stopwords
|
15 |
+
import math
|
16 |
+
from nlp4web_codebase.ir.models import BaseRetriever
|
17 |
+
from abc import abstractmethod
|
18 |
+
import gradio as gr
|
19 |
|
20 |
LANGUAGE = "english"
|
21 |
word_splitter = re.compile(r"(?u)\b\w\w+\b").findall
|
|
|
138 |
doc_texts=doc_texts,
|
139 |
)
|
140 |
|
|
|
|
|
|
|
|
|
141 |
"""### BM25 Index"""
|
142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
@dataclass
|
144 |
class BM25Index(InvertedIndex):
|
145 |
|
|
|
229 |
|
230 |
"""### BM25 Retriever"""
|
231 |
|
|
|
|
|
|
|
|
|
|
|
232 |
class BaseInvertedIndexRetriever(BaseRetriever):
|
233 |
|
234 |
@property
|
|
|
288 |
return BM25Index
|
289 |
|
290 |
|
|
|
|
|
|
|
291 |
class Hit(TypedDict):
|
292 |
cid: str
|
293 |
score: float
|