Sophia Koehler commited on
Commit
a8a9cd5
·
1 Parent(s): 2fa43bc
Files changed (1) hide show
  1. app.py +7 -23
app.py CHANGED
@@ -1,9 +1,10 @@
1
  # -*- coding: utf-8 -*-
2
 
3
- from dataclasses import dataclass
4
  import pickle
5
  import os
6
- from typing import Iterable, Callable, List, Dict, Optional, Type, TypeVar
 
7
  from nlp4web_codebase.ir.data_loaders.dm import Document
8
  from collections import Counter
9
  import tqdm
@@ -11,6 +12,10 @@ import re
11
  import nltk
12
  nltk.download("stopwords", quiet=True)
13
  from nltk.corpus import stopwords as nltk_stopwords
 
 
 
 
14
 
15
  LANGUAGE = "english"
16
  word_splitter = re.compile(r"(?u)\b\w\w+\b").findall
@@ -133,21 +138,8 @@ def run_counting(
133
  doc_texts=doc_texts,
134
  )
135
 
136
- from nlp4web_codebase.ir.data_loaders.sciq import load_sciq
137
- sciq = load_sciq()
138
- counting = run_counting(documents=iter(sciq.corpus), ndocs=len(sciq.corpus))
139
-
140
  """### BM25 Index"""
141
 
142
- from __future__ import annotations
143
- from dataclasses import asdict, dataclass
144
- import math
145
- import os
146
- from typing import Iterable, List, Optional, Type
147
- import tqdm
148
- from nlp4web_codebase.ir.data_loaders.dm import Document
149
-
150
-
151
  @dataclass
152
  class BM25Index(InvertedIndex):
153
 
@@ -237,11 +229,6 @@ class BM25Index(InvertedIndex):
237
 
238
  """### BM25 Retriever"""
239
 
240
- from nlp4web_codebase.ir.models import BaseRetriever
241
- from typing import Type
242
- from abc import abstractmethod
243
-
244
-
245
  class BaseInvertedIndexRetriever(BaseRetriever):
246
 
247
  @property
@@ -301,9 +288,6 @@ class BM25Retriever(BaseInvertedIndexRetriever):
301
  return BM25Index
302
 
303
 
304
- import gradio as gr
305
- from typing import TypedDict
306
-
307
  class Hit(TypedDict):
308
  cid: str
309
  score: float
 
1
  # -*- coding: utf-8 -*-
2
 
3
+ from dataclasses import asdict, dataclass
4
  import pickle
5
  import os
6
+ from __future__ import annotations
7
+ from typing import Iterable, Callable, List, Dict, Optional, Type, TypeVar, TypedDict
8
  from nlp4web_codebase.ir.data_loaders.dm import Document
9
  from collections import Counter
10
  import tqdm
 
12
  import nltk
13
  nltk.download("stopwords", quiet=True)
14
  from nltk.corpus import stopwords as nltk_stopwords
15
+ import math
16
+ from nlp4web_codebase.ir.models import BaseRetriever
17
+ from abc import abstractmethod
18
+ import gradio as gr
19
 
20
  LANGUAGE = "english"
21
  word_splitter = re.compile(r"(?u)\b\w\w+\b").findall
 
138
  doc_texts=doc_texts,
139
  )
140
 
 
 
 
 
141
  """### BM25 Index"""
142
 
 
 
 
 
 
 
 
 
 
143
  @dataclass
144
  class BM25Index(InvertedIndex):
145
 
 
229
 
230
  """### BM25 Retriever"""
231
 
 
 
 
 
 
232
  class BaseInvertedIndexRetriever(BaseRetriever):
233
 
234
  @property
 
288
  return BM25Index
289
 
290
 
 
 
 
291
  class Hit(TypedDict):
292
  cid: str
293
  score: float