Spaces:
Sleeping
Sleeping
Removed comments + add specifications + add batch search
Browse files- app.py +98 -287
- static/script.js +96 -96
- templates/index.html +6 -6
app.py
CHANGED
@@ -41,135 +41,29 @@ app.add_middleware(
|
|
41 |
allow_headers=["*"],
|
42 |
)
|
43 |
|
44 |
-
# def get_text(specification: str, version: str):
|
45 |
-
# """Récupère les bytes du PDF à partir d'une spécification et d'une version."""
|
46 |
-
# doc_id = specification
|
47 |
-
# series = doc_id.split(".")[0]
|
48 |
-
|
49 |
-
# response = requests.get(
|
50 |
-
# f"https://www.3gpp.org/ftp/Specs/archive/{series}_series/{doc_id}/{doc_id.replace('.', '')}-{version}.zip",
|
51 |
-
# verify=False,
|
52 |
-
# headers={"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
|
53 |
-
# )
|
54 |
-
|
55 |
-
# if response.status_code != 200:
|
56 |
-
# raise Exception(f"Téléchargement du ZIP échoué pour {specification}-{version}")
|
57 |
-
|
58 |
-
# zip_bytes = io.BytesIO(response.content)
|
59 |
-
|
60 |
-
# with zipfile.ZipFile(zip_bytes) as zf:
|
61 |
-
# for file_name in zf.namelist():
|
62 |
-
# if file_name.endswith("zip"):
|
63 |
-
# print("Another ZIP !")
|
64 |
-
# zip_bytes = io.BytesIO(zf.read(file_name))
|
65 |
-
# zf = zipfile.ZipFile(zip_bytes)
|
66 |
-
# for file_name2 in zf.namelist():
|
67 |
-
# if file_name2.endswith("doc") or file_name2.endswith("docx"):
|
68 |
-
# if "cover" in file_name2.lower():
|
69 |
-
# print("COVER !")
|
70 |
-
# continue
|
71 |
-
# ext = file_name2.split(".")[-1]
|
72 |
-
# doc_bytes = zf.read(file_name2)
|
73 |
-
# temp_id = str(uuid.uuid4())
|
74 |
-
# input_path = f"/tmp/{temp_id}.{ext}"
|
75 |
-
# output_path = f"/tmp/{temp_id}.txt"
|
76 |
-
|
77 |
-
# with open(input_path, "wb") as f:
|
78 |
-
# f.write(doc_bytes)
|
79 |
-
|
80 |
-
# subprocess.run([
|
81 |
-
# "libreoffice",
|
82 |
-
# "--headless",
|
83 |
-
# "--convert-to", "txt",
|
84 |
-
# "--outdir", "/tmp",
|
85 |
-
# input_path
|
86 |
-
# ], check=True)
|
87 |
-
|
88 |
-
# with open(output_path, "r") as f:
|
89 |
-
# txt_data = [line.strip() for line in f if line.strip()]
|
90 |
-
|
91 |
-
# os.remove(input_path)
|
92 |
-
# os.remove(output_path)
|
93 |
-
# return txt_data
|
94 |
-
# elif file_name.endswith("doc") or file_name.endswith("docx"):
|
95 |
-
# if "cover" in file_name.lower():
|
96 |
-
# print("COVER !")
|
97 |
-
# continue
|
98 |
-
# ext = file_name.split(".")[-1]
|
99 |
-
# doc_bytes = zf.read(file_name)
|
100 |
-
# temp_id = str(uuid.uuid4())
|
101 |
-
# input_path = f"/tmp/{temp_id}.{ext}"
|
102 |
-
# output_path = f"/tmp/{temp_id}.txt"
|
103 |
-
|
104 |
-
# print("Ecriture")
|
105 |
-
# with open(input_path, "wb") as f:
|
106 |
-
# f.write(doc_bytes)
|
107 |
-
|
108 |
-
# print("Convertissement")
|
109 |
-
# subprocess.run([
|
110 |
-
# "libreoffice",
|
111 |
-
# "--headless",
|
112 |
-
# "--convert-to", "txt",
|
113 |
-
# "--outdir", "/tmp",
|
114 |
-
# input_path
|
115 |
-
# ], check=True)
|
116 |
-
|
117 |
-
# print("Ecriture TXT")
|
118 |
-
# with open(output_path, "r", encoding="utf-8") as f:
|
119 |
-
# txt_data = [line.strip() for line in f if line.strip()]
|
120 |
-
|
121 |
-
# os.remove(input_path)
|
122 |
-
# os.remove(output_path)
|
123 |
-
# return txt_data
|
124 |
-
|
125 |
-
# raise Exception(f"Aucun fichier .doc/.docx trouvé dans le ZIP pour {specification}-{version}")
|
126 |
-
|
127 |
-
# def get_scope(specification: str, version: str):
|
128 |
-
# try:
|
129 |
-
# spec_text = get_text(specification, version)
|
130 |
-
# scp_i = 0
|
131 |
-
# nxt_i = 0
|
132 |
-
# for x in range(len(spec_text)):
|
133 |
-
# text = spec_text[x]
|
134 |
-
# if re.search(r"scope$", text, flags=re.IGNORECASE):
|
135 |
-
# scp_i = x
|
136 |
-
# nxt_i = scp_i + 10
|
137 |
-
# if re.search(r"references$", text, flags=re.IGNORECASE):
|
138 |
-
# nxt_i = x
|
139 |
-
|
140 |
-
# return re.sub(r"\s+", " ", " ".join(spec_text[scp_i+1:nxt_i])) if len(spec_text[scp_i+1:nxt_i]) < 2 else "Not found"
|
141 |
-
# except Exception as e:
|
142 |
-
# traceback.print_exception(e)
|
143 |
-
# return "Not found (error)"
|
144 |
-
|
145 |
class DocRequest(BaseModel):
|
146 |
doc_id: str
|
147 |
|
148 |
class DocResponse(BaseModel):
|
149 |
doc_id: str
|
150 |
url: str
|
|
|
151 |
scope: Optional[str] = None
|
152 |
search_time: float
|
153 |
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
# class BatchDocResponse(BaseModel):
|
159 |
-
# results: Dict[str, str]
|
160 |
-
# missing: List[str]
|
161 |
-
# search_time: float
|
162 |
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
# wg: Optional[str] = None
|
167 |
-
# spec_type: Optional[Literal["TS", "TR"]] = None
|
168 |
-
# mode: Optional[Literal["and", "or"]] = "and"
|
169 |
|
170 |
-
|
171 |
-
|
172 |
-
|
|
|
173 |
|
174 |
class DocFinder:
|
175 |
def __init__(self):
|
@@ -230,179 +124,96 @@ class DocFinder:
|
|
230 |
return self.indexer[doc]
|
231 |
|
232 |
main_tsg, workgroup, doc = self.get_workgroup(doc_id)
|
|
|
233 |
if main_tsg:
|
234 |
wg_url = self.find_workgroup_url(main_tsg, workgroup)
|
235 |
if wg_url:
|
236 |
files = self.get_docs_from_url(wg_url)
|
237 |
for f in files:
|
238 |
if doc in f.lower() or original in f:
|
|
|
239 |
doc_url = f"{wg_url}/{f}"
|
240 |
self.indexer[original] = doc_url
|
241 |
self.save_indexer()
|
242 |
-
|
243 |
-
return f"Document {doc_id} not found"
|
244 |
-
|
245 |
-
# class DocFinder:
|
246 |
-
# def __init__(self):
|
247 |
-
# self.main_ftp_url = "https://www.3gpp.org/ftp"
|
248 |
-
# self.indexer_file = "indexed_docs.json"
|
249 |
-
# self.indexer, self.last_indexer_date = self.load_indexer()
|
250 |
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
# return {}, None
|
258 |
-
|
259 |
-
# def save_indexer(self):
|
260 |
-
# """Save the updated index"""
|
261 |
-
# self.last_indexer_date = today.strftime("%d/%m/%Y-%H:%M:%S")
|
262 |
-
# with open(self.indexer_file, "w", encoding="utf-8") as f:
|
263 |
-
# today = datetime.today()
|
264 |
-
# output = {"docs": self.indexer, "last_indexed_date": self.last_indexer_date}
|
265 |
-
# json.dump(output, f, indent=4, ensure_ascii=False)
|
266 |
-
|
267 |
-
# def get_workgroup(self, doc):
|
268 |
-
# main_tsg = "tsg_ct" if doc[0] == "C" else "tsg_sa" if doc[0] == "S" else None
|
269 |
-
# if main_tsg is None:
|
270 |
-
# return None, None, None
|
271 |
-
# workgroup = f"WG{int(doc[1])}" if doc[1].isnumeric() else main_tsg.upper()
|
272 |
-
# return main_tsg, workgroup, doc
|
273 |
-
|
274 |
-
# def find_workgroup_url(self, main_tsg, workgroup):
|
275 |
-
# """Find the URL for the specific workgroup"""
|
276 |
-
# response = requests.get(f"{self.main_ftp_url}/{main_tsg}", verify=False)
|
277 |
-
# soup = BeautifulSoup(response.text, 'html.parser')
|
278 |
|
279 |
-
# for item in soup.find_all("tr"):
|
280 |
-
# link = item.find("a")
|
281 |
-
# if link and workgroup in link.get_text():
|
282 |
-
# return f"{self.main_ftp_url}/{main_tsg}/{link.get_text()}"
|
283 |
-
|
284 |
-
# return f"{self.main_ftp_url}/{main_tsg}/{workgroup}"
|
285 |
-
|
286 |
-
# def get_docs_from_url(self, url):
|
287 |
-
# """Get list of documents/directories from a URL"""
|
288 |
-
# try:
|
289 |
-
# response = requests.get(url, verify=False, timeout=10)
|
290 |
-
# soup = BeautifulSoup(response.text, "html.parser")
|
291 |
-
# return [item.get_text() for item in soup.select("tr td a")]
|
292 |
-
# except Exception as e:
|
293 |
-
# print(f"Error accessing {url}: {e}")
|
294 |
-
# return []
|
295 |
|
296 |
-
|
297 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
304 |
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
#
|
317 |
-
|
318 |
-
# for file in files:
|
319 |
-
# if doc in file.lower() or original_id in file:
|
320 |
-
# doc_url = f"{docs_url}/{file}"
|
321 |
-
# self.indexer[original_id] = doc_url
|
322 |
-
# return doc_url
|
323 |
-
# # ZIP subfolder
|
324 |
-
# if "zip" in [x for x in files]:
|
325 |
-
# zip_url = f"{docs_url}/zip"
|
326 |
-
# zip_files = self.get_docs_from_url(zip_url)
|
327 |
-
# for file in zip_files:
|
328 |
-
# if doc in file.lower() or original_id in file:
|
329 |
-
# doc_url = f"{zip_url}/{file}"
|
330 |
-
# self.indexer[original_id] = doc_url
|
331 |
-
# self.save_indexer()
|
332 |
-
# return doc_url
|
333 |
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
# continue
|
340 |
-
# meeting_url = f"{workshop_url}/{meeting}"
|
341 |
-
# contents = self.get_docs_from_url(meeting_url)
|
342 |
-
# for sub in contents:
|
343 |
-
# if sub.lower() in ['docs', 'tdocs']:
|
344 |
-
# docs_url = f"{meeting_url}/{sub}"
|
345 |
-
# files = self.get_docs_from_url(docs_url)
|
346 |
-
# for file in files:
|
347 |
-
# if doc_id.lower() in file.lower() or original_id in file:
|
348 |
-
# doc_url = f"{docs_url}/{file}"
|
349 |
-
# self.indexer[original_id] = doc_url
|
350 |
-
# self.save_indexer()
|
351 |
-
# return doc_url
|
352 |
-
# if "zip" in [x.lower() for x in files]:
|
353 |
-
# zip_url = f"{docs_url}/zip"
|
354 |
-
# zip_files = self.get_docs_from_url(zip_url)
|
355 |
-
# for file in zip_files:
|
356 |
-
# if doc_id.lower() in file.lower() or original_id in file:
|
357 |
-
# doc_url = f"{zip_url}/{file}"
|
358 |
-
# self.indexer[original_id] = doc_url
|
359 |
-
# self.save_indexer()
|
360 |
-
# return doc_url
|
361 |
|
362 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
363 |
|
364 |
@app.get("/")
|
365 |
async def main_menu():
|
366 |
return FileResponse(os.path.join("templates", "index.html"))
|
367 |
|
368 |
-
|
369 |
-
|
370 |
-
# start_time = time.time()
|
371 |
-
# kws = [_.lower() for _ in request.keywords.split(" ")]
|
372 |
-
# results = []
|
373 |
-
|
374 |
-
# for string, spec in finder_spec.indexer_specs.items():
|
375 |
-
# if request.mode == "and":
|
376 |
-
# if not all(kw in string.lower() for kw in kws):
|
377 |
-
# continue
|
378 |
-
# elif request.mode == "or":
|
379 |
-
# if not any(kw in string.lower() for kw in kws):
|
380 |
-
# continue
|
381 |
-
# release = request.release
|
382 |
-
# working_group = request.wg
|
383 |
-
# spec_type = request.spec_type
|
384 |
|
385 |
-
|
386 |
-
# continue
|
387 |
-
# if spec.get('working_group', None) is None or (working_group is not None and spec["working_group"] != working_group):
|
388 |
-
# continue
|
389 |
-
# if spec_type is not None and spec["type"] != spec_type:
|
390 |
-
# continue
|
391 |
-
|
392 |
-
# results.append(spec)
|
393 |
-
# if len(results) > 0:
|
394 |
-
# return KeywordResponse(
|
395 |
-
# results=results,
|
396 |
-
# search_time=time.time() - start_time
|
397 |
-
# )
|
398 |
-
# else:
|
399 |
-
# raise HTTPException(status_code=404, detail="Specifications not found")
|
400 |
-
|
401 |
-
finder = DocFinder()
|
402 |
-
|
403 |
-
@app.post("/find", response_model=DocResponse)
|
404 |
def find_document(request: DocRequest):
|
405 |
start_time = time.time()
|
|
|
406 |
result = finder.search_document(request.doc_id)
|
407 |
|
408 |
if "not found" not in result and "Could not" not in result and "Unable" not in result:
|
@@ -410,27 +221,27 @@ def find_document(request: DocRequest):
|
|
410 |
doc_id=request.doc_id,
|
411 |
url=result,
|
412 |
search_time=time.time() - start_time
|
413 |
-
)
|
414 |
else:
|
415 |
raise HTTPException(status_code=404, detail=result)
|
416 |
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
# results = {}
|
422 |
-
# missing = []
|
423 |
-
|
424 |
-
# for doc_id in request.doc_ids:
|
425 |
-
# finder = finder_tsg if doc_id[0].isalpha() else finder_spec
|
426 |
-
# result = finder.search_document(doc_id)
|
427 |
-
# if "not found" not in result and "Could not" not in result and "Unable" not in result:
|
428 |
-
# results[doc_id] = result
|
429 |
-
# else:
|
430 |
-
# missing.append(doc_id)
|
431 |
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
allow_headers=["*"],
|
42 |
)
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
class DocRequest(BaseModel):
|
45 |
doc_id: str
|
46 |
|
47 |
class DocResponse(BaseModel):
|
48 |
doc_id: str
|
49 |
url: str
|
50 |
+
release: Optional[str] = None
|
51 |
scope: Optional[str] = None
|
52 |
search_time: float
|
53 |
|
54 |
+
class MultiDocResponse(BaseModel):
|
55 |
+
doc_id: str
|
56 |
+
results: List[DocResponse]
|
57 |
+
search_time: float
|
|
|
|
|
|
|
|
|
58 |
|
59 |
+
class BatchDocRequest(BaseModel):
|
60 |
+
doc_ids: List[str]
|
61 |
+
release: Optional[int] = None
|
|
|
|
|
|
|
62 |
|
63 |
+
class BatchDocResponse(BaseModel):
|
64 |
+
results: Dict[str, str]
|
65 |
+
missing: List[str]
|
66 |
+
search_time: float
|
67 |
|
68 |
class DocFinder:
|
69 |
def __init__(self):
|
|
|
124 |
return self.indexer[doc]
|
125 |
|
126 |
main_tsg, workgroup, doc = self.get_workgroup(doc_id)
|
127 |
+
urls = []
|
128 |
if main_tsg:
|
129 |
wg_url = self.find_workgroup_url(main_tsg, workgroup)
|
130 |
if wg_url:
|
131 |
files = self.get_docs_from_url(wg_url)
|
132 |
for f in files:
|
133 |
if doc in f.lower() or original in f:
|
134 |
+
print(f)
|
135 |
doc_url = f"{wg_url}/{f}"
|
136 |
self.indexer[original] = doc_url
|
137 |
self.save_indexer()
|
138 |
+
urls.append(doc_url)
|
139 |
+
return urls[0] if len(urls) == 1 else urls[-2] if len(urls) > 1 else f"Document {doc_id} not found"
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
|
141 |
+
class SpecFinder:
|
142 |
+
def __init__(self):
|
143 |
+
self.main_url = "https://www.etsi.org/deliver/etsi_ts"
|
144 |
+
self.indexer_file = "indexed_specifications.json"
|
145 |
+
self.indexer, self.last_indexer_date = self.load_indexer()
|
146 |
+
self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
+
def load_indexer(self):
|
150 |
+
if os.path.exists(self.indexer_file):
|
151 |
+
with open(self.indexer_file, "r", encoding="utf-8") as f:
|
152 |
+
x = json.load(f)
|
153 |
+
return x["specs"], x["last_indexed_date"]
|
154 |
+
return {}, None
|
155 |
+
|
156 |
+
def save_indexer(self):
|
157 |
+
today = datetime.today()
|
158 |
+
self.last_indexer_date = today.strftime("%d/%m/%Y-%H:%M:%S")
|
159 |
+
with open(self.indexer_file, "w", encoding="utf-8") as f:
|
160 |
+
output = {"specs": self.indexer, "last_indexed_date": self.last_indexer_date}
|
161 |
+
json.dump(output, f, indent=4, ensure_ascii=False)
|
162 |
|
163 |
+
def get_spec_path(self, doc_id: str):
|
164 |
+
if "-" in doc_id:
|
165 |
+
position, part = doc_id.split("-")
|
166 |
+
else:
|
167 |
+
position, part = doc_id, None
|
168 |
+
|
169 |
+
position = position.replace(" ", "")
|
170 |
+
if part:
|
171 |
+
if len(part) == 1:
|
172 |
+
part = "0" + part
|
173 |
+
spec_folder = position + part if part is not None else position
|
174 |
+
return f"{int(position) - (int(position)%100)}_{int(position) - (int(position)%100) + 99}/{spec_folder}"
|
175 |
|
176 |
+
def get_docs_from_url(self, url):
|
177 |
+
try:
|
178 |
+
response = requests.get(url, verify=False, timeout=15)
|
179 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
180 |
+
docs = [item.get_text() for item in soup.find_all("a")][1:]
|
181 |
+
return docs
|
182 |
+
except Exception as e:
|
183 |
+
print(f"Error accessing {url}: {e}")
|
184 |
+
return []
|
185 |
+
|
186 |
+
def search_document(self, doc_id: str):
|
187 |
+
# Example : 103 666[-2 opt]
|
188 |
+
original = doc_id
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
|
190 |
+
if original in self.indexer:
|
191 |
+
return self.indexer[original]
|
192 |
+
for doc in self.indexer:
|
193 |
+
if doc.startswith(original):
|
194 |
+
return self.indexer[doc]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
|
196 |
+
url = f"{self.main_url}/{self.get_spec_path(original)}/"
|
197 |
+
|
198 |
+
releases = self.get_docs_from_url(url)
|
199 |
+
files = self.get_docs_from_url(url + releases[-1])
|
200 |
+
for f in files:
|
201 |
+
if f.endswith(".pdf"):
|
202 |
+
return url + releases[-1] + "/" + f
|
203 |
+
|
204 |
+
return f"Specification {doc_id} not found"
|
205 |
|
206 |
@app.get("/")
|
207 |
async def main_menu():
|
208 |
return FileResponse(os.path.join("templates", "index.html"))
|
209 |
|
210 |
+
finder_doc = DocFinder()
|
211 |
+
finder_spec = SpecFinder()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
|
213 |
+
@app.post("/find")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
def find_document(request: DocRequest):
|
215 |
start_time = time.time()
|
216 |
+
finder = finder_spec if request.doc_id[0].isnumeric() else finder_doc
|
217 |
result = finder.search_document(request.doc_id)
|
218 |
|
219 |
if "not found" not in result and "Could not" not in result and "Unable" not in result:
|
|
|
221 |
doc_id=request.doc_id,
|
222 |
url=result,
|
223 |
search_time=time.time() - start_time
|
224 |
+
) if not isinstance(result, list) else result
|
225 |
else:
|
226 |
raise HTTPException(status_code=404, detail=result)
|
227 |
|
228 |
+
@app.post("/batch", response_model=BatchDocResponse)
|
229 |
+
def find_documents_batch(request: BatchDocRequest):
|
230 |
+
start_time = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
|
232 |
+
results = {}
|
233 |
+
missing = []
|
234 |
+
|
235 |
+
for doc_id in request.doc_ids:
|
236 |
+
finder = finder_doc if doc_id[0].isalpha() else finder_spec
|
237 |
+
result = finder.search_document(doc_id)
|
238 |
+
if "not found" not in result and "Could not" not in result and "Unable" not in result:
|
239 |
+
results[doc_id] = result
|
240 |
+
else:
|
241 |
+
missing.append(doc_id)
|
242 |
+
|
243 |
+
return BatchDocResponse(
|
244 |
+
results=results,
|
245 |
+
missing=missing,
|
246 |
+
search_time=time.time() - start_time
|
247 |
+
)
|
static/script.js
CHANGED
@@ -2,21 +2,21 @@
|
|
2 |
const dynamicDesc = document.getElementById("dynamicDesc");
|
3 |
const dynamicTitle = document.getElementById("dynamicTitle");
|
4 |
|
5 |
-
|
6 |
-
|
7 |
// const keywordModeBtn = document.getElementById("keyword-mode-btn");
|
8 |
|
9 |
const singleInput = document.querySelector('.single-input');
|
10 |
-
|
11 |
// const keywordSearchInput = document.querySelector(".keyword-input");
|
12 |
|
13 |
const docIdInput = document.getElementById('doc-id');
|
14 |
-
|
15 |
// const keywordInput = document.getElementById("keywords");
|
16 |
|
17 |
const searchBtn = document.getElementById('search-btn');
|
18 |
const batchSearchBtn = document.getElementById('batch-search-btn');
|
19 |
-
const keywordSearchBtn = document.getElementById("keyword-search-btn");
|
20 |
|
21 |
const loader = document.getElementById('loader');
|
22 |
const resultsContainer = document.getElementById('results-container');
|
@@ -25,31 +25,31 @@ const resultsStats = document.getElementById('results-stats');
|
|
25 |
const errorMessage = document.getElementById('error-message');
|
26 |
|
27 |
// Search mode toggle
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
|
32 |
-
|
33 |
-
//
|
34 |
-
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
//
|
39 |
-
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
|
45 |
-
|
46 |
-
//
|
47 |
-
|
48 |
|
49 |
-
|
50 |
-
//
|
51 |
-
|
52 |
-
|
53 |
|
54 |
// keywordModeBtn.addEventListener('click', () => {
|
55 |
// dynamicTitle.textContent = "Search 3GPP specifications";
|
@@ -134,48 +134,48 @@ searchBtn.addEventListener('click', async () => {
|
|
134 |
});
|
135 |
|
136 |
// Batch document search
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
|
153 |
-
|
154 |
-
|
155 |
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
|
165 |
-
|
166 |
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
|
180 |
// Display single result
|
181 |
function displaySingleResult(data) {
|
@@ -246,45 +246,45 @@ function displaySingleNotFound(docId, message) {
|
|
246 |
// resultsContainer.style.display = 'block';
|
247 |
// }
|
248 |
|
249 |
-
//
|
250 |
-
|
251 |
-
|
252 |
|
253 |
-
//
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
|
269 |
-
//
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
|
282 |
-
|
283 |
-
|
284 |
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
|
289 |
// Show loader
|
290 |
function showLoader() {
|
|
|
2 |
const dynamicDesc = document.getElementById("dynamicDesc");
|
3 |
const dynamicTitle = document.getElementById("dynamicTitle");
|
4 |
|
5 |
+
const singleModeBtn = document.getElementById('single-mode-btn');
|
6 |
+
const batchModeBtn = document.getElementById('batch-mode-btn');
|
7 |
// const keywordModeBtn = document.getElementById("keyword-mode-btn");
|
8 |
|
9 |
const singleInput = document.querySelector('.single-input');
|
10 |
+
const batchInput = document.querySelector('.batch-input');
|
11 |
// const keywordSearchInput = document.querySelector(".keyword-input");
|
12 |
|
13 |
const docIdInput = document.getElementById('doc-id');
|
14 |
+
const batchIdsInput = document.getElementById('batch-ids');
|
15 |
// const keywordInput = document.getElementById("keywords");
|
16 |
|
17 |
const searchBtn = document.getElementById('search-btn');
|
18 |
const batchSearchBtn = document.getElementById('batch-search-btn');
|
19 |
+
// const keywordSearchBtn = document.getElementById("keyword-search-btn");
|
20 |
|
21 |
const loader = document.getElementById('loader');
|
22 |
const resultsContainer = document.getElementById('results-container');
|
|
|
25 |
const errorMessage = document.getElementById('error-message');
|
26 |
|
27 |
// Search mode toggle
|
28 |
+
singleModeBtn.addEventListener('click', () => {
|
29 |
+
dynamicTitle.textContent = "Find ETSI Documents";
|
30 |
+
dynamicDesc.textContent = "Enter a SET/SCP/TS document ID to locate the document in the ETSI DocBox server.";
|
31 |
|
32 |
+
singleModeBtn.classList.add('active');
|
33 |
+
// keywordModeBtn.classList.remove("active");
|
34 |
+
batchModeBtn.classList.remove('active');
|
35 |
|
36 |
+
singleInput.style.display = 'block';
|
37 |
+
batchInput.style.display = 'none';
|
38 |
+
// keywordSearchInput.style.display = "none";
|
39 |
+
});
|
40 |
|
41 |
+
batchModeBtn.addEventListener('click', () => {
|
42 |
+
dynamicTitle.textContent = "Find multiple ETSI Documents";
|
43 |
+
dynamicDesc.textContent = "Enter a list of SET/SCP/TS document ID to locate all specified documents in the ETSI DocBox server.";
|
44 |
|
45 |
+
batchModeBtn.classList.add('active');
|
46 |
+
//keywordModeBtn.classList.remove("active");
|
47 |
+
singleModeBtn.classList.remove('active');
|
48 |
|
49 |
+
batchInput.style.display = 'block';
|
50 |
+
//keywordSearchInput.style.display = "none";
|
51 |
+
singleInput.style.display = 'none';
|
52 |
+
});
|
53 |
|
54 |
// keywordModeBtn.addEventListener('click', () => {
|
55 |
// dynamicTitle.textContent = "Search 3GPP specifications";
|
|
|
134 |
});
|
135 |
|
136 |
// Batch document search
|
137 |
+
batchSearchBtn.addEventListener('click', async () => {
|
138 |
+
const batchText = batchIdsInput.value.trim();
|
139 |
+
if (!batchText) {
|
140 |
+
showError('Please enter at least one document ID');
|
141 |
+
return;
|
142 |
+
}
|
143 |
|
144 |
+
const docIds = batchText.split('\n')
|
145 |
+
.map(id => id.trim())
|
146 |
+
.filter(id => id !== '');
|
147 |
|
148 |
+
if (docIds.length === 0) {
|
149 |
+
showError('Please enter at least one valid document ID');
|
150 |
+
return;
|
151 |
+
}
|
152 |
|
153 |
+
showLoader();
|
154 |
+
hideError();
|
155 |
|
156 |
+
try {
|
157 |
+
const response = await fetch(`/batch`, {
|
158 |
+
method: 'POST',
|
159 |
+
headers: {
|
160 |
+
'Content-Type': 'application/json'
|
161 |
+
},
|
162 |
+
body: JSON.stringify({ doc_ids: docIds })
|
163 |
+
});
|
164 |
|
165 |
+
const data = await response.json();
|
166 |
|
167 |
+
if (response.ok) {
|
168 |
+
displayBatchResults(data);
|
169 |
+
} else {
|
170 |
+
showError('Error processing batch request');
|
171 |
+
}
|
172 |
+
} catch (error) {
|
173 |
+
showError('Error connecting to the server. Please check if the API is running.');
|
174 |
+
console.error('Error:', error);
|
175 |
+
} finally {
|
176 |
+
hideLoader();
|
177 |
+
}
|
178 |
+
});
|
179 |
|
180 |
// Display single result
|
181 |
function displaySingleResult(data) {
|
|
|
246 |
// resultsContainer.style.display = 'block';
|
247 |
// }
|
248 |
|
249 |
+
// Display batch results
|
250 |
+
function displayBatchResults(data) {
|
251 |
+
resultsList.innerHTML = '';
|
252 |
|
253 |
+
// Found documents
|
254 |
+
Object.entries(data.results).forEach(([docId, url]) => {
|
255 |
+
const resultItem = document.createElement('div');
|
256 |
+
resultItem.className = 'result-item';
|
257 |
+
resultItem.innerHTML = `
|
258 |
+
<div class="result-header">
|
259 |
+
<div class="result-id">${docId}</div>
|
260 |
+
<div class="result-status status-found">Found</div>
|
261 |
+
</div>
|
262 |
+
<div class="result-url">
|
263 |
+
<a href="${url}" target="_blank">${url}</a>
|
264 |
+
</div>
|
265 |
+
`;
|
266 |
+
resultsList.appendChild(resultItem);
|
267 |
+
});
|
268 |
|
269 |
+
// Not found documents
|
270 |
+
data.missing.forEach(docId => {
|
271 |
+
const resultItem = document.createElement('div');
|
272 |
+
resultItem.className = 'result-item';
|
273 |
+
resultItem.innerHTML = `
|
274 |
+
<div class="result-header">
|
275 |
+
<div class="result-id">${docId}</div>
|
276 |
+
<div class="result-status status-not-found">Not Found</div>
|
277 |
+
</div>
|
278 |
+
`;
|
279 |
+
resultsList.appendChild(resultItem);
|
280 |
+
});
|
281 |
|
282 |
+
const foundCount = Object.keys(data.results).length;
|
283 |
+
const totalCount = foundCount + data.missing.length;
|
284 |
|
285 |
+
resultsStats.textContent = `Found ${foundCount} of ${totalCount} documents in ${data.search_time.toFixed(2)} seconds`;
|
286 |
+
resultsContainer.style.display = 'block';
|
287 |
+
}
|
288 |
|
289 |
// Show loader
|
290 |
function showLoader() {
|
templates/index.html
CHANGED
@@ -20,31 +20,31 @@
|
|
20 |
<div class="search-container">
|
21 |
<div class="search-header">
|
22 |
<h2 id="dynamicTitle">Find ETSI Documents</h2>
|
23 |
-
<p id="dynamicDesc">Enter a SET/SCP document
|
24 |
</div>
|
25 |
|
26 |
<div class="search-mode">
|
27 |
<button id="single-mode-btn" class="active">Single Document</button>
|
28 |
-
|
29 |
-
|
30 |
</div>
|
31 |
|
32 |
<div class="search-form">
|
33 |
<div class="input-group single-input">
|
34 |
<label for="doc-id">Document ID</label>
|
35 |
<div class="input-field">
|
36 |
-
<input type="text" id="doc-id" placeholder="Enter SET/SCP document ID (e.g. SETREQ(15)0015451r1, SCP(12)15151,
|
37 |
<button id="search-btn" class="btn">Search</button>
|
38 |
</div>
|
39 |
</div>
|
40 |
|
41 |
-
|
42 |
<label for="batch-ids">Document IDs or Specification IDs (one per line)</label>
|
43 |
<textarea id="batch-ids" placeholder="Enter document IDs or specification IDs, one per line (e.g., S1-123456, C2-987654, 31.102)"></textarea>
|
44 |
<div class="hint">Enter one document ID per line</div>
|
45 |
<button id="batch-search-btn" class="btn" style="margin-top: 10px;">Search All</button>
|
46 |
</div>
|
47 |
-
|
48 |
<div class="input-group keyword-input">
|
49 |
<label for="keywords">Keywords</label>
|
50 |
<div class="input-field">
|
|
|
20 |
<div class="search-container">
|
21 |
<div class="search-header">
|
22 |
<h2 id="dynamicTitle">Find ETSI Documents</h2>
|
23 |
+
<p id="dynamicDesc">Enter a SET/SCP/TS document ID to locate the document in the ETSI DocBox server.</p>
|
24 |
</div>
|
25 |
|
26 |
<div class="search-mode">
|
27 |
<button id="single-mode-btn" class="active">Single Document</button>
|
28 |
+
<button id="batch-mode-btn">Batch Search</button>
|
29 |
+
<!--<button id="keyword-mode-btn">Keyword Search</button> -->
|
30 |
</div>
|
31 |
|
32 |
<div class="search-form">
|
33 |
<div class="input-group single-input">
|
34 |
<label for="doc-id">Document ID</label>
|
35 |
<div class="input-field">
|
36 |
+
<input type="text" id="doc-id" placeholder="Enter SET/SCP/TS document ID (e.g. SETREQ(15)0015451r1, SCP(12)15151, 103 666-2)">
|
37 |
<button id="search-btn" class="btn">Search</button>
|
38 |
</div>
|
39 |
</div>
|
40 |
|
41 |
+
<div class="input-group batch-input">
|
42 |
<label for="batch-ids">Document IDs or Specification IDs (one per line)</label>
|
43 |
<textarea id="batch-ids" placeholder="Enter document IDs or specification IDs, one per line (e.g., S1-123456, C2-987654, 31.102)"></textarea>
|
44 |
<div class="hint">Enter one document ID per line</div>
|
45 |
<button id="batch-search-btn" class="btn" style="margin-top: 10px;">Search All</button>
|
46 |
</div>
|
47 |
+
<!--
|
48 |
<div class="input-group keyword-input">
|
49 |
<label for="keywords">Keywords</label>
|
50 |
<div class="input-field">
|