Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
fix(websearch): block protocols on playwright (#1579)
Browse files* fix(websearch): block protocols on playwright
* fix: only allow https
src/lib/server/websearch/scrape/playwright.ts
CHANGED
@@ -68,7 +68,18 @@ export async function withPage<T>(
|
|
68 |
|
69 |
try {
|
70 |
const page = await ctx.newPage();
|
71 |
-
env.PLAYWRIGHT_ADBLOCKER === "true"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
const res = await page
|
74 |
.goto(url, { waitUntil: "load", timeout: parseInt(env.WEBSEARCH_TIMEOUT) })
|
@@ -78,9 +89,8 @@ export async function withPage<T>(
|
|
78 |
);
|
79 |
});
|
80 |
|
81 |
-
// await needed here so that we don't close the context before the callback is done
|
82 |
return await callback(page, res ?? undefined);
|
83 |
} finally {
|
84 |
-
ctx.close();
|
85 |
}
|
86 |
}
|
|
|
68 |
|
69 |
try {
|
70 |
const page = await ctx.newPage();
|
71 |
+
if (env.PLAYWRIGHT_ADBLOCKER === "true") {
|
72 |
+
await blocker.enableBlockingInPage(page);
|
73 |
+
}
|
74 |
+
|
75 |
+
await page.route("**", (route, request) => {
|
76 |
+
const requestUrl = request.url();
|
77 |
+
if (!requestUrl.startsWith("https://")) {
|
78 |
+
logger.warn(`Blocked request to: ${requestUrl}`);
|
79 |
+
return route.abort();
|
80 |
+
}
|
81 |
+
return route.continue();
|
82 |
+
});
|
83 |
|
84 |
const res = await page
|
85 |
.goto(url, { waitUntil: "load", timeout: parseInt(env.WEBSEARCH_TIMEOUT) })
|
|
|
89 |
);
|
90 |
});
|
91 |
|
|
|
92 |
return await callback(page, res ?? undefined);
|
93 |
} finally {
|
94 |
+
await ctx.close();
|
95 |
}
|
96 |
}
|