nsarrazin HF Staff commited on
Commit
54e4da8
·
unverified ·
1 Parent(s): d5e6bbd

fix(websearch): block protocols on playwright (#1579)

Browse files

* fix(websearch): block protocols on playwright

* fix: only allow https

src/lib/server/websearch/scrape/playwright.ts CHANGED
@@ -68,7 +68,18 @@ export async function withPage<T>(
68
 
69
  try {
70
  const page = await ctx.newPage();
71
- env.PLAYWRIGHT_ADBLOCKER === "true" && (await blocker.enableBlockingInPage(page));
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  const res = await page
74
  .goto(url, { waitUntil: "load", timeout: parseInt(env.WEBSEARCH_TIMEOUT) })
@@ -78,9 +89,8 @@ export async function withPage<T>(
78
  );
79
  });
80
 
81
- // await needed here so that we don't close the context before the callback is done
82
  return await callback(page, res ?? undefined);
83
  } finally {
84
- ctx.close();
85
  }
86
  }
 
68
 
69
  try {
70
  const page = await ctx.newPage();
71
+ if (env.PLAYWRIGHT_ADBLOCKER === "true") {
72
+ await blocker.enableBlockingInPage(page);
73
+ }
74
+
75
+ await page.route("**", (route, request) => {
76
+ const requestUrl = request.url();
77
+ if (!requestUrl.startsWith("https://")) {
78
+ logger.warn(`Blocked request to: ${requestUrl}`);
79
+ return route.abort();
80
+ }
81
+ return route.continue();
82
+ });
83
 
84
  const res = await page
85
  .goto(url, { waitUntil: "load", timeout: parseInt(env.WEBSEARCH_TIMEOUT) })
 
89
  );
90
  });
91
 
 
92
  return await callback(page, res ?? undefined);
93
  } finally {
94
+ await ctx.close();
95
  }
96
  }