Spaces:

Asman2010
/

Perplexica_Perplexity_Clone

Running

App Files Files Community

Asman2010 commited on May 5, 2024

Commit

969bb94

verified ·

1 Parent(s): e36f4b1

Upload 81 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +1 -0
.github/ISSUE_TEMPLATE/bug_report.md +27 -0
.github/ISSUE_TEMPLATE/custom.md +7 -0
.github/ISSUE_TEMPLATE/feature_request.md +19 -0
.gitignore +34 -0
.prettierignore +38 -0
.prettierrc.js +12 -0
CONTRIBUTING.md +39 -0
LICENSE +21 -0
README.md +117 -11
app.dockerfile +15 -0
backend.dockerfile +18 -0
config.toml +11 -0
docker-compose.yaml +45 -0
docs/architecture/README.md +11 -0
docs/architecture/WORKING.md +19 -0
package.json +36 -0
searxng-settings.yml +2356 -0
searxng.dockerfile +3 -0
src/Perplexica - Shortcut.lnk +0 -0
src/agents/academicSearchAgent.ts +265 -0
src/agents/imageSearchAgent.ts +84 -0
src/agents/redditSearchAgent.ts +260 -0
src/agents/videoSearchAgent.ts +90 -0
src/agents/webSearchAgent.ts +261 -0
src/agents/wolframAlphaSearchAgent.ts +219 -0
src/agents/writingAssistant.ts +90 -0
src/agents/youtubeSearchAgent.ts +261 -0
src/app.ts +30 -0
src/config.ts +69 -0
src/lib/providers.ts +157 -0
src/lib/searxng.ts +47 -0
src/routes/config.ts +63 -0
src/routes/images.ts +46 -0
src/routes/index.ts +14 -0
src/routes/models.ts +24 -0
src/routes/videos.ts +46 -0
src/utils/computeSimilarity.ts +17 -0
src/utils/formatHistory.ts +9 -0
src/utils/logger.ts +22 -0
src/websocket/connectionManager.ts +86 -0
src/websocket/index.ts +8 -0
src/websocket/messageHandler.ts +109 -0
src/websocket/websocketServer.ts +16 -0
tsconfig.json +17 -0
ui/.env.example +2 -0
ui/.eslintrc.json +3 -0
ui/.gitignore +34 -0
ui/.prettierrc.js +11 -0
ui/app/discover/page.tsx +5 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ **/node_modules

.github/ISSUE_TEMPLATE/bug_report.md ADDED Viewed

	@@ -0,0 +1,27 @@

+---
+name: Bug report
+about: Create an issue to help us fix bugs
+title: ''
+labels: bug
+assignees: ''
+---
+**Describe the bug**
+A clear and concise description of what the bug is.
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+**Screenshots**
+If applicable, add screenshots to help explain your problem.
+**Additional context**
+Add any other context about the problem here.

.github/ISSUE_TEMPLATE/custom.md ADDED Viewed

	@@ -0,0 +1,7 @@

+---
+name: Custom issue template
+about: Describe this issue template's purpose here.
+title: ''
+labels: ''
+assignees: ''
+---

.github/ISSUE_TEMPLATE/feature_request.md ADDED Viewed

	@@ -0,0 +1,19 @@

+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: enhancement
+assignees: ''
+---
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+**Additional context**
+Add any other context or screenshots about the feature request here.

.gitignore ADDED Viewed

	@@ -0,0 +1,34 @@

+# Node.js
+node_modules/
+npm-debug.log
+yarn-error.log
+# Build output
+/.next/
+/out/
+# IDE/Editor specific
+.vscode/
+.idea/
+*.iml
+# Environment variables
+.env
+.env.local
+.env.development.local
+.env.test.local
+.env.production.local
+# Config files
+config.toml
+# Log files
+logs/
+*.log
+# Testing
+/coverage/
+# Miscellaneous
+.DS_Store
+Thumbs.db

.prettierignore ADDED Viewed

	@@ -0,0 +1,38 @@

+# Ignore all files in the node_modules directory
+node_modules
+# Ignore all files in the .next directory (Next.js build output)
+.next
+# Ignore all files in the .out directory (TypeScript build output)
+.out
+# Ignore all files in the .cache directory (Prettier cache)
+.cache
+# Ignore all files in the .vscode directory (Visual Studio Code settings)
+.vscode
+# Ignore all files in the .idea directory (IntelliJ IDEA settings)
+.idea
+# Ignore all files in the dist directory (build output)
+dist
+# Ignore all files in the build directory (build output)
+build
+# Ignore all files in the coverage directory (test coverage reports)
+coverage
+# Ignore all files with the .log extension
+*.log
+# Ignore all files with the .tmp extension
+*.tmp
+# Ignore all files with the .swp extension
+*.swp
+# Ignore all files with the .DS_Store extension (macOS specific)
+.DS_Store

.prettierrc.js ADDED Viewed

	@@ -0,0 +1,12 @@

+/** @type {import("prettier").Config} */
+const config = {
+  printWidth: 80,
+  trailingComma: 'all',
+  endOfLine: 'auto',
+  singleQuote: true,
+  tabWidth: 2,
+  semi: true,
+};
+module.exports = config;

CONTRIBUTING.md ADDED Viewed

	@@ -0,0 +1,39 @@

+# How to Contribute to Perplexica
+Hey there, thanks for deciding to contribute to Perplexica. Anything you help with will support the development of Perplexica and will make it better. Let's walk you through the key aspects to ensure your contributions are effective and in harmony with the project's setup.
+## Project Structure
+Perplexica's design consists of two main domains:
+- **Frontend (`ui` directory)**: This is a Next.js application holding all user interface components. It's a self-contained environment that manages everything the user interacts with.
+- **Backend (root and `src` directory)**: The backend logic is situated in the `src` folder, but the root directory holds the main `package.json` for backend dependency management.
+## Setting Up Your Environment
+Before diving into coding, setting up your local environment is key. Here's what you need to do:
+### Backend
+1. In the root directory, locate the `sample.config.toml` file.
+2. Rename it to `config.toml` and fill in the necessary configuration fields specific to the backend.
+3. Run `npm install` to install dependencies.
+4. Use `npm run dev` to start the backend in development mode.
+### Frontend
+1. Navigate to the `ui` folder and repeat the process of renaming `.env.example` to `.env`, making sure to provide the frontend-specific variables.
+2. Execute `npm install` within the `ui` directory to get the frontend dependencies ready.
+3. Launch the frontend development server with `npm run dev`.
+**Please note**: Docker configurations are present for setting up production environments, whereas `npm run dev` is used for development purposes.
+## Coding and Contribution Practices
+Before committing changes:
+1. Ensure that your code functions correctly by thorough testing.
+2. Always run `npm run format:write` to format your code according to the project's coding standards. This helps maintain consistency and code quality.
+3. We currently do not have a code of conduct, but it is in the works. In the meantime, please be mindful of how you engage with the project and its community.
+Following these steps will help maintain the integrity of Perplexica's codebase and facilitate a smoother integration of your valuable contributions. Thank you for your support and commitment to improving Perplexica.

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 ItzCrazyKns
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,11 +1,117 @@
----
-title: Perplexica Perplexity Clone
-emoji: 📉
-colorFrom: red
-colorTo: gray
-sdk: docker
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# 🚀 Perplexica - An AI-powered search engine 🔎 <!-- omit in toc -->
+![preview](.assets/perplexica-screenshot.png)
+## Table of Contents <!-- omit in toc -->
+- [Overview](#overview)
+- [Preview](#preview)
+- [Features](#features)
+- [Installation](#installation)
+  - [Getting Started with Docker (Recommended)](#getting-started-with-docker-recommended)
+  - [Non-Docker Installation](#non-docker-installation)
+- [One-Click Deployment](#one-click-deployment)
+- [Upcoming Features](#upcoming-features)
+- [Support Us](#support-us)
+- [Contribution](#contribution)
+- [Help and Support](#help-and-support)
+## Overview
+Perplexica is an open-source AI-powered searching tool or an AI-powered search engine that goes deep into the internet to find answers. Inspired by Perplexity AI, it's an open-source option that not just searches the web but understands your questions. It uses advanced machine learning algorithms like similarity searching and embeddings to refine results and provides clear answers with sources cited.
+Using SearxNG to stay current and fully open source, Perplexica ensures you always get the most up-to-date information without compromising your privacy.
+Want to know more about its architecture and how it works? You can read it [here](https://github.com/ItzCrazyKns/Perplexica/tree/master/docs/architecture/README.md).
+## Preview
+![video-preview](.assets/perplexica-preview.gif)
+## Features
+- **Local LLMs**: You can make use local LLMs such as Llama3 and Mixtral using Ollama.
+- **Two Main Modes:**
+  - **Copilot Mode:** (In development) Boosts search by generating different queries to find more relevant internet sources. Like normal search instead of just using the context by SearxNG, it visits the top matches and tries to find relevant sources to the user's query directly from the page.
+  - **Normal Mode:** Processes your query and performs a web search.
+- **Focus Modes:** Special modes to better answer specific types of questions. Perplexica currently has 6 focus modes:
+  - **All Mode:** Searches the entire web to find the best results.
+  - **Writing Assistant Mode:** Helpful for writing tasks that does not require searching the web.
+  - **Academic Search Mode:** Finds articles and papers, ideal for academic research.
+  - **YouTube Search Mode:** Finds YouTube videos based on the search query.
+  - **Wolfram Alpha Search Mode:** Answers queries that need calculations or data analysis using Wolfram Alpha.
+  - **Reddit Search Mode:** Searches Reddit for discussions and opinions related to the query.
+- **Current Information:** Some search tools might give you outdated info because they use data from crawling bots and convert them into embeddings and store them in a index. Unlike them, Perplexica uses SearxNG, a metasearch engine to get the results and rerank and get the most relevant source out of it, ensuring you always get the latest information without the overhead of daily data updates.
+It has many more features like image and video search. Some of the planned features are mentioned in [upcoming features](#upcoming-features).
+## Installation
+There are mainly 2 ways of installing Perplexica - With Docker, Without Docker. Using Docker is highly recommended.
+### Getting Started with Docker (Recommended)
+1. Ensure Docker is installed and running on your system.
+2. Clone the Perplexica repository:
+   ```bash
+   git clone https://github.com/ItzCrazyKns/Perplexica.git
+   ```
+3. After cloning, navigate to the directory containing the project files.
+4. Rename the `sample.config.toml` file to `config.toml`. For Docker setups, you need only fill in the following fields:
+   - `OPENAI`: Your OpenAI API key. **You only need to fill this if you wish to use OpenAI's models**.
+   - `OLLAMA`: Your Ollama API URL. You should enter it as `http://host.docker.internal:PORT_NUMBER`. If you installed Ollama on port 11434, use `http://host.docker.internal:11434`. For other ports, adjust accordingly. **You need to fill this if you wish to use Ollama's models instead of OpenAI's**.
+   - `GROQ`: Your Groq API key. **You only need to fill this if you wish to use Groq's hosted models**
+     **Note**: You can change these after starting Perplexica from the settings dialog.
+   - `SIMILARITY_MEASURE`: The similarity measure to use (This is filled by default; you can leave it as is if you are unsure about it.)
+5. Ensure you are in the directory containing the `docker-compose.yaml` file and execute:
+   ```bash
+   docker compose up -d
+   ```
+6. Wait a few minutes for the setup to complete. You can access Perplexica at http://localhost:3000 in your web browser.
+**Note**: After the containers are built, you can start Perplexica directly from Docker without having to open a terminal.
+### Non-Docker Installation
+1. Clone the repository and rename the `sample.config.toml` file to `config.toml` in the root directory. Ensure you complete all required fields in this file.
+2. Rename the `.env.example` file to `.env` in the `ui` folder and fill in all necessary fields.
+3. After populating the configuration and environment files, run `npm i` in both the `ui` folder and the root directory.
+4. Install the dependencies and then execute `npm run build` in both the `ui` folder and the root directory.
+5. Finally, start both the frontend and the backend by running `npm run start` in both the `ui` folder and the root directory.
+**Note**: Using Docker is recommended as it simplifies the setup process, especially for managing environment variables and dependencies.
+## One-Click Deployment
+[![Deploy to RepoCloud](https://d16t0pc4846x52.cloudfront.net/deploylobe.svg)](https://repocloud.io/details/?app_id=267)
+## Upcoming Features
+- [ ] Finalizing Copilot Mode
+- [x] Add settings page
+- [x] Adding support for local LLMs
+- [ ] Adding Discover and History Saving features
+- [x] Introducing various Focus Modes
+## Support Us
+If you find Perplexica useful, consider giving us a star on GitHub. This helps more people discover Perplexica and supports the development of new features. Your support is appreciated.
+## Contribution
+Perplexica is built on the idea that AI and large language models should be easy for everyone to use. If you find bugs or have ideas, please share them in via GitHub Issues. For more information on contributing to Perplexica you can read the [CONTRIBUTING.md](CONTRIBUTING.md) file to learn more about Perplexica and how you can contribute to it.
+## Help and Support
+If you have any questions or feedback, please feel free to reach out to us. You can create an issue on GitHub or join our Discord server. There, you can connect with other users, share your experiences and reviews, and receive more personalized help. [Click here](https://discord.gg/EFwsmQDgAu) to join the Discord server. To discuss matters outside of regular support, feel free to contact me on Discord at `itzcrazykns`.
+Thank you for exploring Perplexica, the AI-powered search engine designed to enhance your search experience. We are constantly working to improve Perplexica and expand its capabilities. We value your feedback and contributions which help us make Perplexica even better. Don't forget to check back for updates and new features!

app.dockerfile ADDED Viewed

	@@ -0,0 +1,15 @@

+FROM node:alpine
+ARG NEXT_PUBLIC_WS_URL
+ARG NEXT_PUBLIC_API_URL
+ENV NEXT_PUBLIC_WS_URL=${NEXT_PUBLIC_WS_URL}
+ENV NEXT_PUBLIC_API_URL=${NEXT_PUBLIC_API_URL}
+WORKDIR /home/perplexica
+COPY ui /home/perplexica/
+RUN yarn install
+RUN yarn build
+CMD ["yarn", "start"]

backend.dockerfile ADDED Viewed

	@@ -0,0 +1,18 @@

+FROM node:alpine
+ARG SEARXNG_API_URL
+WORKDIR /home/perplexica
+COPY src /home/perplexica/src
+COPY tsconfig.json /home/perplexica/
+COPY config.toml /home/perplexica/
+COPY package.json /home/perplexica/
+COPY yarn.lock /home/perplexica/
+RUN sed -i "s|SEARXNG = \".*\"|SEARXNG = \"${SEARXNG_API_URL}\"|g" /home/perplexica/config.toml
+RUN yarn install
+RUN yarn build
+CMD ["yarn", "start"]

config.toml ADDED Viewed

	@@ -0,0 +1,11 @@

+[GENERAL]
+PORT = 3001 # Port to run the server on
+SIMILARITY_MEASURE = "cosine" # "cosine" or "dot"
+[API_KEYS]
+OPENAI = "" # OpenAI API key - sk-1234567890abcdef1234567890abcdef
+GROQ = "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"
+[API_ENDPOINTS]
+SEARXNG = "http://localhost:32768" # SearxNG API URL
+OLLAMA = "" # Ollama API URL - http://host.docker.internal:11434

docker-compose.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+services:
+  searxng:
+    build:
+      context: .
+      dockerfile: searxng.dockerfile
+    expose:
+      - 4000
+    ports:
+      - 4000:8080
+    networks:
+      - perplexica-network
+  perplexica-backend:
+    build:
+      context: .
+      dockerfile: backend.dockerfile
+      args:
+        - SEARXNG_API_URL=http://searxng:8080
+    depends_on:
+      - searxng
+    expose:
+      - 3001
+    ports:
+      - 3001:3001
+    networks:
+      - perplexica-network
+  perplexica-frontend:
+    build:
+      context: .
+      dockerfile: app.dockerfile
+      args:
+        - NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api
+        - NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001
+    depends_on:
+      - perplexica-backend
+    expose:
+      - 3000
+    ports:
+      - 3000:3000
+    networks:
+      - perplexica-network
+networks:
+  perplexica-network:

docs/architecture/README.md ADDED Viewed

	@@ -0,0 +1,11 @@

+## Perplexica's Architecture
+Perplexica's architecture consists of the following key components:
+1. **User Interface**: A web-based interface that allows users to interact with Perplexica for searching images, videos, and much more.
+2. **Agent/Chains**: These components predict Perplexica's next actions, understand user queries, and decide whether a web search is necessary.
+3. **SearXNG**: A metadata search engine used by Perplexica to search the web for sources.
+4. **LLMs (Large Language Models)**: Utilized by agents and chains for tasks like understanding content, writing responses, and citing sources. Examples include Claude, GPTs, etc.
+5. **Embedding Models**: To improve the accuracy of search results, embedding models re-rank the results using similarity search algorithms such as cosine similarity and dot product distance.
+For a more detailed explanation of how these components work together, see [WORKING.md](https://github.com/ItzCrazyKns/Perplexica/tree/master/docs/architecture/WORKING.md).

docs/architecture/WORKING.md ADDED Viewed

	@@ -0,0 +1,19 @@

+## How does Perplexica work?
+Curious about how Perplexica works? Don't worry, we'll cover it here. Before we begin, make sure you've read about the architecture of Perplexica to ensure you understand what it's made up of. Haven't read it? You can read it [here](https://github.com/ItzCrazyKns/Perplexica/tree/master/docs/architecture/README.md).
+We'll understand how Perplexica works by taking an example of a scenario where a user asks: "How does an A.C. work?". We'll break down the process into steps to make it easier to understand. The steps are as follows:
+1. The message is sent via WS to the backend server where it invokes the chain. The chain will depend on your focus mode. For this example, let's assume we use the "webSearch" focus mode.
+2. The chain is now invoked; first, the message is passed to another chain where it first predicts (using the chat history and the question) whether there is a need for sources or searching the web. If there is, it will generate a query (in accordance with the chat history) for searching the web that we'll take up later. If not, the chain will end there, and then the answer generator chain, also known as the response generator, will be started.
+3. The query returned by the first chain is passed to SearXNG to search the web for information.
+4. After the information is retrieved, it is based on keyword-based search. We then convert the information into embeddings and the query as well, then we perform a similarity search to find the most relevant sources to answer the query.
+5. After all this is done, the sources are passed to the response generator. This chain takes all the chat history, the query, and the sources. It generates a response that is streamed to the UI.
+### How are the answers cited?
+The LLMs are prompted to do so. We've prompted them so well that they cite the answers themselves, and using some UI magic, we display it to the user.
+### Image and Video Search
+Image and video searches are conducted in a similar manner. A query is always generated first, then we search the web for images and videos that match the query. These results are then returned to the user.

package.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "name": "perplexica-backend",
+  "version": "1.3.1",
+  "license": "MIT",
+  "author": "ItzCrazyKns",
+  "scripts": {
+    "start": "node dist/app.js",
+    "build": "tsc",
+    "dev": "nodemon src/app.ts",
+    "format": "prettier . --check",
+    "format:write": "prettier . --write"
+  },
+  "devDependencies": {
+    "@types/cors": "^2.8.17",
+    "@types/express": "^4.17.21",
+    "@types/readable-stream": "^4.0.11",
+    "nodemon": "^3.1.0",
+    "prettier": "^3.2.5",
+    "ts-node": "^10.9.2",
+    "typescript": "^5.4.3"
+  },
+  "dependencies": {
+    "@iarna/toml": "^2.2.5",
+    "@langchain/openai": "^0.0.25",
+    "axios": "^1.6.8",
+    "compute-cosine-similarity": "^1.1.0",
+    "compute-dot": "^1.1.0",
+    "cors": "^2.8.5",
+    "dotenv": "^16.4.5",
+    "express": "^4.19.2",
+    "langchain": "^0.1.30",
+    "winston": "^3.13.0",
+    "ws": "^8.16.0",
+    "zod": "^3.22.4"
+  }
+}

searxng-settings.yml ADDED Viewed

	@@ -0,0 +1,2356 @@

+general:
+  # Debug mode, only for development. Is overwritten by ${SEARXNG_DEBUG}
+  debug: false
+  # displayed name
+  instance_name: 'searxng'
+  # For example: https://example.com/privacy
+  privacypolicy_url: false
+  # use true to use your own donation page written in searx/info/en/donate.md
+  # use false to disable the donation link
+  donation_url: false
+  # mailto:[email protected]
+  contact_url: false
+  # record stats
+  enable_metrics: true
+brand:
+  new_issue_url: https://github.com/searxng/searxng/issues/new
+  docs_url: https://docs.searxng.org/
+  public_instances: https://searx.space
+  wiki_url: https://github.com/searxng/searxng/wiki
+  issue_url: https://github.com/searxng/searxng/issues
+  # custom:
+  #   maintainer: "Jon Doe"
+  #   # Custom entries in the footer: [title]: [link]
+  #   links:
+  #     Uptime: https://uptime.searxng.org/history/darmarit-org
+  #     About: "https://searxng.org"
+search:
+  # Filter results. 0: None, 1: Moderate, 2: Strict
+  safe_search: 0
+  # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "yandex", "mwmbl",
+  # "seznam", "startpage", "stract", "swisscows", "qwant", "wikipedia" - leave blank to turn it off
+  # by default.
+  autocomplete: 'google'
+  # minimun characters to type before autocompleter starts
+  autocomplete_min: 4
+  # Default search language - leave blank to detect from browser information or
+  # use codes from 'languages.py'
+  default_lang: 'auto'
+  # max_page: 0  # if engine supports paging, 0 means unlimited numbers of pages
+  # Available languages
+  # languages:
+  #   - all
+  #   - en
+  #   - en-US
+  #   - de
+  #   - it-IT
+  #   - fr
+  #   - fr-BE
+  # ban time in seconds after engine errors
+  ban_time_on_fail: 5
+  # max ban time in seconds after engine errors
+  max_ban_time_on_fail: 120
+  suspended_times:
+    # Engine suspension time after error (in seconds; set to 0 to disable)
+    # For error "Access denied" and "HTTP error [402, 403]"
+    SearxEngineAccessDenied: 86400
+    # For error "CAPTCHA"
+    SearxEngineCaptcha: 86400
+    # For error "Too many request" and "HTTP error 429"
+    SearxEngineTooManyRequests: 3600
+    # Cloudflare CAPTCHA
+    cf_SearxEngineCaptcha: 1296000
+    cf_SearxEngineAccessDenied: 86400
+    # ReCAPTCHA
+    recaptcha_SearxEngineCaptcha: 604800
+  # remove format to deny access, use lower case.
+  # formats: [html, csv, json, rss]
+  formats:
+    - html
+    - json
+server:
+  # Is overwritten by ${SEARXNG_PORT} and ${SEARXNG_BIND_ADDRESS}
+  port: 8888
+  bind_address: '127.0.0.1'
+  # public URL of the instance, to ensure correct inbound links. Is overwritten
+  # by ${SEARXNG_URL}.
+  base_url: / # "http://example.com/location"
+  limiter: false # rate limit the number of request on the instance, block some bots
+  public_instance: false # enable features designed only for public instances
+  # If your instance owns a /etc/searxng/settings.yml file, then set the following
+  # values there.
+  secret_key: 'a2fb23f1b02e6ee83875b09826990de0f6bd908b6638e8c10277d415f6ab852b' # Is overwritten by ${SEARXNG_SECRET}
+  # Proxying image results through searx
+  image_proxy: false
+  # 1.0 and 1.1 are supported
+  http_protocol_version: '1.0'
+  # POST queries are more secure as they don't show up in history but may cause
+  # problems when using Firefox containers
+  method: 'POST'
+  default_http_headers:
+    X-Content-Type-Options: nosniff
+    X-Download-Options: noopen
+    X-Robots-Tag: noindex, nofollow
+    Referrer-Policy: no-referrer
+redis:
+  # URL to connect redis database. Is overwritten by ${SEARXNG_REDIS_URL}.
+  # https://docs.searxng.org/admin/settings/settings_redis.html#settings-redis
+  url: false
+ui:
+  # Custom static path - leave it blank if you didn't change
+  static_path: ''
+  static_use_hash: false
+  # Custom templates path - leave it blank if you didn't change
+  templates_path: ''
+  # query_in_title: When true, the result page's titles contains the query
+  # it decreases the privacy, since the browser can records the page titles.
+  query_in_title: false
+  # infinite_scroll: When true, automatically loads the next page when scrolling to bottom of the current page.
+  infinite_scroll: false
+  # ui theme
+  default_theme: simple
+  # center the results ?
+  center_alignment: false
+  # URL prefix of the internet archive, don't forget trailing slash (if needed).
+  # cache_url: "https://webcache.googleusercontent.com/search?q=cache:"
+  # Default interface locale - leave blank to detect from browser information or
+  # use codes from the 'locales' config section
+  default_locale: ''
+  # Open result links in a new tab by default
+  # results_on_new_tab: false
+  theme_args:
+    # style of simple theme: auto, light, dark
+    simple_style: auto
+  # Perform search immediately if a category selected.
+  # Disable to select multiple categories at once and start the search manually.
+  search_on_category_select: true
+  # Hotkeys: default or vim
+  hotkeys: default
+# Lock arbitrary settings on the preferences page.  To find the ID of the user
+# setting you want to lock, check the ID of the form on the page "preferences".
+#
+# preferences:
+#   lock:
+#     - language
+#     - autocomplete
+#     - method
+#     - query_in_title
+# searx supports result proxification using an external service:
+# https://github.com/asciimoo/morty uncomment below section if you have running
+# morty proxy the key is base64 encoded (keep the !!binary notation)
+# Note: since commit af77ec3, morty accepts a base64 encoded key.
+#
+# result_proxy:
+#   url: http://127.0.0.1:3000/
+#   # the key is a base64 encoded string, the YAML !!binary prefix is optional
+#   key: !!binary "your_morty_proxy_key"
+#   # [true|false] enable the "proxy" button next to each result
+#   proxify_results: true
+# communication with search engines
+#
+outgoing:
+  # default timeout in seconds, can be override by engine
+  request_timeout: 3.0
+  # the maximum timeout in seconds
+  # max_request_timeout: 10.0
+  # suffix of searx_useragent, could contain information like an email address
+  # to the administrator
+  useragent_suffix: ''
+  # The maximum number of concurrent connections that may be established.
+  pool_connections: 100
+  # Allow the connection pool to maintain keep-alive connections below this
+  # point.
+  pool_maxsize: 20
+  # See https://www.python-httpx.org/http2/
+  enable_http2: true
+  # uncomment below section if you want to use a custom server certificate
+  # see https://www.python-httpx.org/advanced/#changing-the-verification-defaults
+  # and https://www.python-httpx.org/compatibility/#ssl-configuration
+  #  verify: ~/.mitmproxy/mitmproxy-ca-cert.cer
+  #
+  # uncomment below section if you want to use a proxyq see: SOCKS proxies
+  #   https://2.python-requests.org/en/latest/user/advanced/#proxies
+  # are also supported: see
+  #   https://2.python-requests.org/en/latest/user/advanced/#socks
+  #
+  #  proxies:
+  #    all://:
+  #      - http://proxy1:8080
+  #      - http://proxy2:8080
+  #
+  #  using_tor_proxy: true
+  #
+  # Extra seconds to add in order to account for the time taken by the proxy
+  #
+  #  extra_proxy_timeout: 10.0
+  #
+  # uncomment below section only if you have more than one network interface
+  # which can be the source of outgoing search requests
+  #
+  #  source_ips:
+  #    - 1.1.1.1
+  #    - 1.1.1.2
+  #    - fe80::/126
+# External plugin configuration, for more details see
+#   https://docs.searxng.org/dev/plugins.html
+#
+# plugins:
+#   - plugin1
+#   - plugin2
+#   - ...
+# Comment or un-comment plugin to activate / deactivate by default.
+#
+# enabled_plugins:
+#   # these plugins are enabled if nothing is configured ..
+#   - 'Hash plugin'
+#   - 'Self Information'
+#   - 'Tracker URL remover'
+#   - 'Ahmia blacklist'  # activation depends on outgoing.using_tor_proxy
+#   # these plugins are disabled if nothing is configured ..
+#   - 'Hostname replace'  # see hostname_replace configuration below
+#   - 'Open Access DOI rewrite'
+#   - 'Tor check plugin'
+#   # Read the docs before activate: auto-detection of the language could be
+#   # detrimental to users expectations / users can activate the plugin in the
+#   # preferences if they want.
+#   - 'Autodetect search language'
+# Configuration of the "Hostname replace" plugin:
+#
+# hostname_replace:
+#   '(.*\.)?youtube\.com$': 'invidious.example.com'
+#   '(.*\.)?youtu\.be$': 'invidious.example.com'
+#   '(.*\.)?youtube-noocookie\.com$': 'yotter.example.com'
+#   '(.*\.)?reddit\.com$': 'teddit.example.com'
+#   '(.*\.)?redd\.it$': 'teddit.example.com'
+#   '(www\.)?twitter\.com$': 'nitter.example.com'
+#   # to remove matching host names from result list, set value to false
+#   'spam\.example\.com': false
+checker:
+  # disable checker when in debug mode
+  off_when_debug: true
+  # use "scheduling: false" to disable scheduling
+  # scheduling: interval or int
+  # to activate the scheduler:
+  # * uncomment "scheduling" section
+  # * add "cache2 = name=searxngcache,items=2000,blocks=2000,blocksize=4096,bitmap=1"
+  #   to your uwsgi.ini
+  # scheduling:
+  #   start_after: [300, 1800]  # delay to start the first run of the checker
+  #   every: [86400, 90000]     # how often the checker runs
+  # additional tests: only for the YAML anchors (see the engines section)
+  #
+  additional_tests:
+    rosebud: &test_rosebud
+      matrix:
+        query: rosebud
+        lang: en
+      result_container:
+        - not_empty
+        - ['one_title_contains', 'citizen kane']
+      test:
+        - unique_results
+    android: &test_android
+      matrix:
+        query: ['android']
+        lang: ['en', 'de', 'fr', 'zh-CN']
+      result_container:
+        - not_empty
+        - ['one_title_contains', 'google']
+      test:
+        - unique_results
+  # tests: only for the YAML anchors (see the engines section)
+  tests:
+    infobox: &tests_infobox
+      infobox:
+        matrix:
+          query: ['linux', 'new york', 'bbc']
+        result_container:
+          - has_infobox
+categories_as_tabs:
+  general:
+  images:
+  videos:
+  news:
+  map:
+  music:
+  it:
+  science:
+  files:
+  social media:
+engines:
+  - name: 9gag
+    engine: 9gag
+    shortcut: 9g
+    disabled: true
+  - name: annas archive
+    engine: annas_archive
+    disabled: true
+    shortcut: aa
+  # - name: annas articles
+  #   engine: annas_archive
+  #   shortcut: aaa
+  #   # https://docs.searxng.org/dev/engines/online/annas_archive.html
+  #   aa_content: 'journal_article' # book_any .. magazine, standards_document
+  #   aa_ext: 'pdf'  # pdf, epub, ..
+  #   aa_sort: 'newest'  # newest, oldest, largest, smallest
+  - name: apk mirror
+    engine: apkmirror
+    timeout: 4.0
+    shortcut: apkm
+    disabled: true
+  - name: apple app store
+    engine: apple_app_store
+    shortcut: aps
+    disabled: true
+  # Requires Tor
+  - name: ahmia
+    engine: ahmia
+    categories: onions
+    enable_http: true
+    shortcut: ah
+  - name: anaconda
+    engine: xpath
+    paging: true
+    first_page_num: 0
+    search_url: https://anaconda.org/search?q={query}&page={pageno}
+    results_xpath: //tbody/tr
+    url_xpath: ./td/h5/a[last()]/@href
+    title_xpath: ./td/h5
+    content_xpath: ./td[h5]/text()
+    categories: it
+    timeout: 6.0
+    shortcut: conda
+    disabled: true
+  - name: arch linux wiki
+    engine: archlinux
+    shortcut: al
+  - name: artic
+    engine: artic
+    shortcut: arc
+    timeout: 4.0
+  - name: arxiv
+    engine: arxiv
+    shortcut: arx
+    timeout: 4.0
+  - name: ask
+    engine: ask
+    shortcut: ask
+    disabled: true
+  # tmp suspended:  dh key too small
+  # - name: base
+  #   engine: base
+  #   shortcut: bs
+  - name: bandcamp
+    engine: bandcamp
+    shortcut: bc
+    categories: music
+  - name: wikipedia
+    engine: wikipedia
+    shortcut: wp
+    # add "list" to the array to get results in the results list
+    display_type: ['infobox']
+    base_url: 'https://{language}.wikipedia.org/'
+    categories: [general]
+  - name: bilibili
+    engine: bilibili
+    shortcut: bil
+    disabled: true
+  - name: bing
+    engine: bing
+    shortcut: bi
+    disabled: true
+  - name: bing images
+    engine: bing_images
+    shortcut: bii
+  - name: bing news
+    engine: bing_news
+    shortcut: bin
+  - name: bing videos
+    engine: bing_videos
+    shortcut: biv
+  - name: bitbucket
+    engine: xpath
+    paging: true
+    search_url: https://bitbucket.org/repo/all/{pageno}?name={query}
+    url_xpath: //article[@class="repo-summary"]//a[@class="repo-link"]/@href
+    title_xpath: //article[@class="repo-summary"]//a[@class="repo-link"]
+    content_xpath: //article[@class="repo-summary"]/p
+    categories: [it, repos]
+    timeout: 4.0
+    disabled: true
+    shortcut: bb
+    about:
+      website: https://bitbucket.org/
+      wikidata_id: Q2493781
+      official_api_documentation: https://developer.atlassian.com/bitbucket
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+  - name: bpb
+    engine: bpb
+    shortcut: bpb
+    disabled: true
+  - name: btdigg
+    engine: btdigg
+    shortcut: bt
+    disabled: true
+  - name: ccc-tv
+    engine: xpath
+    paging: false
+    search_url: https://media.ccc.de/search/?q={query}
+    url_xpath: //div[@class="caption"]/h3/a/@href
+    title_xpath: //div[@class="caption"]/h3/a/text()
+    content_xpath: //div[@class="caption"]/h4/@title
+    categories: videos
+    disabled: true
+    shortcut: c3tv
+    about:
+      website: https://media.ccc.de/
+      wikidata_id: Q80729951
+      official_api_documentation: https://github.com/voc/voctoweb
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+      # We don't set language: de here because media.ccc.de is not just
+      # for a German audience. It contains many English videos and many
+      # German videos have English subtitles.
+  - name: openverse
+    engine: openverse
+    categories: images
+    shortcut: opv
+  - name: chefkoch
+    engine: chefkoch
+    shortcut: chef
+    # to show premium or plus results too:
+    # skip_premium: false
+  # - name: core.ac.uk
+  #   engine: core
+  #   categories: science
+  #   shortcut: cor
+  #   # get your API key from: https://core.ac.uk/api-keys/register/
+  #   api_key: 'unset'
+  - name: crossref
+    engine: crossref
+    shortcut: cr
+    timeout: 30
+    disabled: true
+  - name: crowdview
+    engine: json_engine
+    shortcut: cv
+    categories: general
+    paging: false
+    search_url: https://crowdview-next-js.onrender.com/api/search-v3?query={query}
+    results_query: results
+    url_query: link
+    title_query: title
+    content_query: snippet
+    disabled: true
+    about:
+      website: https://crowdview.ai/
+  - name: yep
+    engine: yep
+    shortcut: yep
+    categories: general
+    search_type: web
+    disabled: true
+  - name: yep images
+    engine: yep
+    shortcut: yepi
+    categories: images
+    search_type: images
+    disabled: true
+  - name: yep news
+    engine: yep
+    shortcut: yepn
+    categories: news
+    search_type: news
+    disabled: true
+  - name: curlie
+    engine: xpath
+    shortcut: cl
+    categories: general
+    disabled: true
+    paging: true
+    lang_all: ''
+    search_url: https://curlie.org/search?q={query}&lang={lang}&start={pageno}&stime=92452189
+    page_size: 20
+    results_xpath: //div[@id="site-list-content"]/div[@class="site-item"]
+    url_xpath: ./div[@class="title-and-desc"]/a/@href
+    title_xpath: ./div[@class="title-and-desc"]/a/div
+    content_xpath: ./div[@class="title-and-desc"]/div[@class="site-descr"]
+    about:
+      website: https://curlie.org/
+      wikidata_id: Q60715723
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+  - name: currency
+    engine: currency_convert
+    categories: general
+    shortcut: cc
+  - name: bahnhof
+    engine: json_engine
+    search_url: https://www.bahnhof.de/api/stations/search/{query}
+    url_prefix: https://www.bahnhof.de/
+    url_query: slug
+    title_query: name
+    content_query: state
+    shortcut: bf
+    disabled: true
+    about:
+      website: https://www.bahn.de
+      wikidata_id: Q22811603
+      use_official_api: false
+      require_api_key: false
+      results: JSON
+      language: de
+  - name: deezer
+    engine: deezer
+    shortcut: dz
+    disabled: true
+  - name: destatis
+    engine: destatis
+    shortcut: destat
+    disabled: true
+  - name: deviantart
+    engine: deviantart
+    shortcut: da
+    timeout: 3.0
+  - name: ddg definitions
+    engine: duckduckgo_definitions
+    shortcut: ddd
+    weight: 2
+    disabled: true
+    tests: *tests_infobox
+  # cloudflare protected
+  # - name: digbt
+  #   engine: digbt
+  #   shortcut: dbt
+  #   timeout: 6.0
+  #   disabled: true
+  - name: docker hub
+    engine: docker_hub
+    shortcut: dh
+    categories: [it, packages]
+  - name: erowid
+    engine: xpath
+    paging: true
+    first_page_num: 0
+    page_size: 30
+    search_url: https://www.erowid.org/search.php?q={query}&s={pageno}
+    url_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/@href
+    title_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/text()
+    content_xpath: //dl[@class="results-list"]/dd[@class="result-details"]
+    categories: []
+    shortcut: ew
+    disabled: true
+    about:
+      website: https://www.erowid.org/
+      wikidata_id: Q1430691
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+  # - name: elasticsearch
+  #   shortcut: es
+  #   engine: elasticsearch
+  #   base_url: http://localhost:9200
+  #   username: elastic
+  #   password: changeme
+  #   index: my-index
+  #   # available options: match, simple_query_string, term, terms, custom
+  #   query_type: match
+  #   # if query_type is set to custom, provide your query here
+  #   #custom_query_json: {"query":{"match_all": {}}}
+  #   #show_metadata: false
+  #   disabled: true
+  - name: wikidata
+    engine: wikidata
+    shortcut: wd
+    timeout: 3.0
+    weight: 2
+    # add "list" to the array to get results in the results list
+    display_type: ['infobox']
+    tests: *tests_infobox
+    categories: [general]
+  - name: duckduckgo
+    engine: duckduckgo
+    shortcut: ddg
+  - name: duckduckgo images
+    engine: duckduckgo_extra
+    categories: [images, web]
+    ddg_category: images
+    shortcut: ddi
+    disabled: true
+  - name: duckduckgo videos
+    engine: duckduckgo_extra
+    categories: [videos, web]
+    ddg_category: videos
+    shortcut: ddv
+    disabled: true
+  - name: duckduckgo news
+    engine: duckduckgo_extra
+    categories: [news, web]
+    ddg_category: news
+    shortcut: ddn
+    disabled: true
+  - name: duckduckgo weather
+    engine: duckduckgo_weather
+    shortcut: ddw
+    disabled: true
+  - name: apple maps
+    engine: apple_maps
+    shortcut: apm
+    disabled: true
+    timeout: 5.0
+  - name: emojipedia
+    engine: emojipedia
+    timeout: 4.0
+    shortcut: em
+    disabled: true
+  - name: tineye
+    engine: tineye
+    shortcut: tin
+    timeout: 9.0
+    disabled: true
+  - name: etymonline
+    engine: xpath
+    paging: true
+    search_url: https://etymonline.com/search?page={pageno}&q={query}
+    url_xpath: //a[contains(@class, "word__name--")]/@href
+    title_xpath: //a[contains(@class, "word__name--")]
+    content_xpath: //section[contains(@class, "word__defination")]
+    first_page_num: 1
+    shortcut: et
+    categories: [dictionaries]
+    about:
+      website: https://www.etymonline.com/
+      wikidata_id: Q1188617
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+  # - name: ebay
+  #   engine: ebay
+  #   shortcut: eb
+  #   base_url: 'https://www.ebay.com'
+  #   disabled: true
+  #   timeout: 5
+  - name: 1x
+    engine: www1x
+    shortcut: 1x
+    timeout: 3.0
+    disabled: true
+  - name: fdroid
+    engine: fdroid
+    shortcut: fd
+    disabled: true
+  - name: flickr
+    categories: images
+    shortcut: fl
+    # You can use the engine using the official stable API, but you need an API
+    # key, see: https://www.flickr.com/services/apps/create/
+    # engine: flickr
+    # api_key: 'apikey' # required!
+    # Or you can use the html non-stable engine, activated by default
+    engine: flickr_noapi
+  - name: free software directory
+    engine: mediawiki
+    shortcut: fsd
+    categories: [it, software wikis]
+    base_url: https://directory.fsf.org/
+    search_type: title
+    timeout: 5.0
+    disabled: true
+    about:
+      website: https://directory.fsf.org/
+      wikidata_id: Q2470288
+  # - name: freesound
+  #   engine: freesound
+  #   shortcut: fnd
+  #   disabled: true
+  #   timeout: 15.0
+  # API key required, see: https://freesound.org/docs/api/overview.html
+  #   api_key: MyAPIkey
+  - name: frinkiac
+    engine: frinkiac
+    shortcut: frk
+    disabled: true
+  - name: fyyd
+    engine: fyyd
+    shortcut: fy
+    timeout: 8.0
+    disabled: true
+  - name: genius
+    engine: genius
+    shortcut: gen
+  - name: gentoo
+    engine: gentoo
+    shortcut: ge
+    timeout: 10.0
+  - name: gitlab
+    engine: json_engine
+    paging: true
+    search_url: https://gitlab.com/api/v4/projects?search={query}&page={pageno}
+    url_query: web_url
+    title_query: name_with_namespace
+    content_query: description
+    page_size: 20
+    categories: [it, repos]
+    shortcut: gl
+    timeout: 10.0
+    disabled: true
+    about:
+      website: https://about.gitlab.com/
+      wikidata_id: Q16639197
+      official_api_documentation: https://docs.gitlab.com/ee/api/
+      use_official_api: false
+      require_api_key: false
+      results: JSON
+  - name: github
+    engine: github
+    shortcut: gh
+    # This a Gitea service. If you would like to use a different instance,
+    # change codeberg.org to URL of the desired Gitea host. Or you can create a
+    # new engine by copying this and changing the name, shortcut and search_url.
+  - name: codeberg
+    engine: json_engine
+    search_url: https://codeberg.org/api/v1/repos/search?q={query}&limit=10
+    url_query: html_url
+    title_query: name
+    content_query: description
+    categories: [it, repos]
+    shortcut: cb
+    disabled: true
+    about:
+      website: https://codeberg.org/
+      wikidata_id:
+      official_api_documentation: https://try.gitea.io/api/swagger
+      use_official_api: false
+      require_api_key: false
+      results: JSON
+  - name: goodreads
+    engine: goodreads
+    shortcut: good
+    timeout: 4.0
+    disabled: true
+  - name: google
+    engine: google
+    shortcut: go
+    # additional_tests:
+    #   android: *test_android
+  - name: google images
+    engine: google_images
+    shortcut: goi
+    # additional_tests:
+    #   android: *test_android
+    #   dali:
+    #     matrix:
+    #       query: ['Dali Christ']
+    #       lang: ['en', 'de', 'fr', 'zh-CN']
+    #     result_container:
+    #       - ['one_title_contains', 'Salvador']
+  - name: google news
+    engine: google_news
+    shortcut: gon
+    # additional_tests:
+    #   android: *test_android
+  - name: google videos
+    engine: google_videos
+    shortcut: gov
+    # additional_tests:
+    #   android: *test_android
+  - name: google scholar
+    engine: google_scholar
+    shortcut: gos
+  - name: google play apps
+    engine: google_play
+    categories: [files, apps]
+    shortcut: gpa
+    play_categ: apps
+    disabled: true
+  - name: google play movies
+    engine: google_play
+    categories: videos
+    shortcut: gpm
+    play_categ: movies
+    disabled: true
+  - name: material icons
+    engine: material_icons
+    categories: images
+    shortcut: mi
+    disabled: true
+  - name: gpodder
+    engine: json_engine
+    shortcut: gpod
+    timeout: 4.0
+    paging: false
+    search_url: https://gpodder.net/search.json?q={query}
+    url_query: url
+    title_query: title
+    content_query: description
+    page_size: 19
+    categories: music
+    disabled: true
+    about:
+      website: https://gpodder.net
+      wikidata_id: Q3093354
+      official_api_documentation: https://gpoddernet.readthedocs.io/en/latest/api/
+      use_official_api: false
+      requires_api_key: false
+      results: JSON
+  - name: habrahabr
+    engine: xpath
+    paging: true
+    search_url: https://habr.com/en/search/page{pageno}/?q={query}
+    results_xpath: //article[contains(@class, "tm-articles-list__item")]
+    url_xpath: .//a[@class="tm-title__link"]/@href
+    title_xpath: .//a[@class="tm-title__link"]
+    content_xpath: .//div[contains(@class, "article-formatted-body")]
+    categories: it
+    timeout: 4.0
+    disabled: true
+    shortcut: habr
+    about:
+      website: https://habr.com/
+      wikidata_id: Q4494434
+      official_api_documentation: https://habr.com/en/docs/help/api/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+  - name: hackernews
+    engine: hackernews
+    shortcut: hn
+    disabled: true
+  - name: hoogle
+    engine: xpath
+    paging: true
+    search_url: https://hoogle.haskell.org/?hoogle={query}&start={pageno}
+    results_xpath: '//div[@class="result"]'
+    title_xpath: './/div[@class="ans"]//a'
+    url_xpath: './/div[@class="ans"]//a/@href'
+    content_xpath: './/div[@class="from"]'
+    page_size: 20
+    categories: [it, packages]
+    shortcut: ho
+    about:
+      website: https://hoogle.haskell.org/
+      wikidata_id: Q34010
+      official_api_documentation: https://hackage.haskell.org/api
+      use_official_api: false
+      require_api_key: false
+      results: JSON
+  - name: imdb
+    engine: imdb
+    shortcut: imdb
+    timeout: 6.0
+    disabled: true
+  - name: imgur
+    engine: imgur
+    shortcut: img
+    disabled: true
+  - name: ina
+    engine: ina
+    shortcut: in
+    timeout: 6.0
+    disabled: true
+  - name: invidious
+    engine: invidious
+    # Instanes will be selected randomly, see https://api.invidious.io/ for
+    # instances that are stable (good uptime) and close to you.
+    base_url:
+      - https://invidious.io.lol
+      - https://invidious.fdn.fr
+      - https://yt.artemislena.eu
+      - https://invidious.tiekoetter.com
+      - https://invidious.flokinet.to
+      - https://vid.puffyan.us
+      - https://invidious.privacydev.net
+      - https://inv.tux.pizza
+    shortcut: iv
+    timeout: 3.0
+    disabled: true
+  - name: jisho
+    engine: jisho
+    shortcut: js
+    timeout: 3.0
+    disabled: true
+  - name: kickass
+    engine: kickass
+    base_url:
+      - https://kickasstorrents.to
+      - https://kickasstorrents.cr
+      - https://kickasstorrent.cr
+      - https://kickass.sx
+      - https://kat.am
+    shortcut: kc
+    timeout: 4.0
+  - name: lemmy communities
+    engine: lemmy
+    lemmy_type: Communities
+    shortcut: leco
+  - name: lemmy users
+    engine: lemmy
+    network: lemmy communities
+    lemmy_type: Users
+    shortcut: leus
+  - name: lemmy posts
+    engine: lemmy
+    network: lemmy communities
+    lemmy_type: Posts
+    shortcut: lepo
+  - name: lemmy comments
+    engine: lemmy
+    network: lemmy communities
+    lemmy_type: Comments
+    shortcut: lecom
+  - name: library genesis
+    engine: xpath
+    # search_url: https://libgen.is/search.php?req={query}
+    search_url: https://libgen.rs/search.php?req={query}
+    url_xpath: //a[contains(@href,"book/index.php?md5")]/@href
+    title_xpath: //a[contains(@href,"book/")]/text()[1]
+    content_xpath: //td/a[1][contains(@href,"=author")]/text()
+    categories: files
+    timeout: 7.0
+    disabled: true
+    shortcut: lg
+    about:
+      website: https://libgen.fun/
+      wikidata_id: Q22017206
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+  - name: z-library
+    engine: zlibrary
+    shortcut: zlib
+    categories: files
+    timeout: 7.0
+  - name: library of congress
+    engine: loc
+    shortcut: loc
+    categories: images
+  - name: lingva
+    engine: lingva
+    shortcut: lv
+    # set lingva instance in url, by default it will use the official instance
+    # url: https://lingva.thedaviddelta.com
+  - name: lobste.rs
+    engine: xpath
+    search_url: https://lobste.rs/search?utf8=%E2%9C%93&q={query}&what=stories&order=relevance
+    results_xpath: //li[contains(@class, "story")]
+    url_xpath: .//a[@class="u-url"]/@href
+    title_xpath: .//a[@class="u-url"]
+    content_xpath: .//a[@class="domain"]
+    categories: it
+    shortcut: lo
+    timeout: 5.0
+    disabled: true
+    about:
+      website: https://lobste.rs/
+      wikidata_id: Q60762874
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+  - name: mastodon users
+    engine: mastodon
+    mastodon_type: accounts
+    base_url: https://mastodon.social
+    shortcut: mau
+  - name: mastodon hashtags
+    engine: mastodon
+    mastodon_type: hashtags
+    base_url: https://mastodon.social
+    shortcut: mah
+  # - name: matrixrooms
+  #   engine: mrs
+  #   # https://docs.searxng.org/dev/engines/online/mrs.html
+  #   # base_url: https://mrs-api-host
+  #   shortcut: mtrx
+  #   disabled: true
+  - name: mdn
+    shortcut: mdn
+    engine: json_engine
+    categories: [it]
+    paging: true
+    search_url: https://developer.mozilla.org/api/v1/search?q={query}&page={pageno}
+    results_query: documents
+    url_query: mdn_url
+    url_prefix: https://developer.mozilla.org
+    title_query: title
+    content_query: summary
+    about:
+      website: https://developer.mozilla.org
+      wikidata_id: Q3273508
+      official_api_documentation: null
+      use_official_api: false
+      require_api_key: false
+      results: JSON
+  - name: metacpan
+    engine: metacpan
+    shortcut: cpan
+    disabled: true
+    number_of_results: 20
+  # - name: meilisearch
+  #   engine: meilisearch
+  #   shortcut: mes
+  #   enable_http: true
+  #   base_url: http://localhost:7700
+  #   index: my-index
+  - name: mixcloud
+    engine: mixcloud
+    shortcut: mc
+  # MongoDB engine
+  # Required dependency: pymongo
+  # - name: mymongo
+  #   engine: mongodb
+  #   shortcut: md
+  #   exact_match_only: false
+  #   host: '127.0.0.1'
+  #   port: 27017
+  #   enable_http: true
+  #   results_per_page: 20
+  #   database: 'business'
+  #   collection: 'reviews'  # name of the db collection
+  #   key: 'name'  # key in the collection to search for
+  - name: mozhi
+    engine: mozhi
+    base_url:
+      - https://mozhi.aryak.me
+      - https://translate.bus-hit.me
+      - https://nyc1.mz.ggtyler.dev
+    # mozhi_engine: google - see https://mozhi.aryak.me for supported engines
+    timeout: 4.0
+    shortcut: mz
+    disabled: true
+  - name: mwmbl
+    engine: mwmbl
+    # api_url: https://api.mwmbl.org
+    shortcut: mwm
+    disabled: true
+  - name: npm
+    engine: json_engine
+    paging: true
+    first_page_num: 0
+    search_url: https://api.npms.io/v2/search?q={query}&size=25&from={pageno}
+    results_query: results
+    url_query: package/links/npm
+    title_query: package/name
+    content_query: package/description
+    page_size: 25
+    categories: [it, packages]
+    disabled: true
+    timeout: 5.0
+    shortcut: npm
+    about:
+      website: https://npms.io/
+      wikidata_id: Q7067518
+      official_api_documentation: https://api-docs.npms.io/
+      use_official_api: false
+      require_api_key: false
+      results: JSON
+  - name: nyaa
+    engine: nyaa
+    shortcut: nt
+    disabled: true
+  - name: mankier
+    engine: json_engine
+    search_url: https://www.mankier.com/api/v2/mans/?q={query}
+    results_query: results
+    url_query: url
+    title_query: name
+    content_query: description
+    categories: it
+    shortcut: man
+    about:
+      website: https://www.mankier.com/
+      official_api_documentation: https://www.mankier.com/api
+      use_official_api: true
+      require_api_key: false
+      results: JSON
+  - name: odysee
+    engine: odysee
+    shortcut: od
+    disabled: true
+  - name: openairedatasets
+    engine: json_engine
+    paging: true
+    search_url: https://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query}
+    results_query: response/results/result
+    url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$
+    title_query: metadata/oaf:entity/oaf:result/title/$
+    content_query: metadata/oaf:entity/oaf:result/description/$
+    content_html_to_text: true
+    categories: 'science'
+    shortcut: oad
+    timeout: 5.0
+    about:
+      website: https://www.openaire.eu/
+      wikidata_id: Q25106053
+      official_api_documentation: https://api.openaire.eu/
+      use_official_api: false
+      require_api_key: false
+      results: JSON
+  - name: openairepublications
+    engine: json_engine
+    paging: true
+    search_url: https://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query}
+    results_query: response/results/result
+    url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$
+    title_query: metadata/oaf:entity/oaf:result/title/$
+    content_query: metadata/oaf:entity/oaf:result/description/$
+    content_html_to_text: true
+    categories: science
+    shortcut: oap
+    timeout: 5.0
+    about:
+      website: https://www.openaire.eu/
+      wikidata_id: Q25106053
+      official_api_documentation: https://api.openaire.eu/
+      use_official_api: false
+      require_api_key: false
+      results: JSON
+  # - name: opensemanticsearch
+  #   engine: opensemantic
+  #   shortcut: oss
+  #   base_url: 'http://localhost:8983/solr/opensemanticsearch/'
+  - name: openstreetmap
+    engine: openstreetmap
+    shortcut: osm
+  - name: openrepos
+    engine: xpath
+    paging: true
+    search_url: https://openrepos.net/search/node/{query}?page={pageno}
+    url_xpath: //li[@class="search-result"]//h3[@class="title"]/a/@href
+    title_xpath: //li[@class="search-result"]//h3[@class="title"]/a
+    content_xpath: //li[@class="search-result"]//div[@class="search-snippet-info"]//p[@class="search-snippet"]
+    categories: files
+    timeout: 4.0
+    disabled: true
+    shortcut: or
+    about:
+      website: https://openrepos.net/
+      wikidata_id:
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+  - name: packagist
+    engine: json_engine
+    paging: true
+    search_url: https://packagist.org/search.json?q={query}&page={pageno}
+    results_query: results
+    url_query: url
+    title_query: name
+    content_query: description
+    categories: [it, packages]
+    disabled: true
+    timeout: 5.0
+    shortcut: pack
+    about:
+      website: https://packagist.org
+      wikidata_id: Q108311377
+      official_api_documentation: https://packagist.org/apidoc
+      use_official_api: true
+      require_api_key: false
+      results: JSON
+  - name: pdbe
+    engine: pdbe
+    shortcut: pdb
+    # Hide obsolete PDB entries.  Default is not to hide obsolete structures
+    #  hide_obsolete: false
+  - name: photon
+    engine: photon
+    shortcut: ph
+  - name: pinterest
+    engine: pinterest
+    shortcut: pin
+  - name: piped
+    engine: piped
+    shortcut: ppd
+    categories: videos
+    piped_filter: videos
+    timeout: 3.0
+    # URL to use as link and for embeds
+    frontend_url: https://srv.piped.video
+    # Instance will be selected randomly, for more see https://piped-instances.kavin.rocks/
+    backend_url:
+      - https://pipedapi.kavin.rocks
+      - https://pipedapi-libre.kavin.rocks
+      - https://pipedapi.adminforge.de
+  - name: piped.music
+    engine: piped
+    network: piped
+    shortcut: ppdm
+    categories: music
+    piped_filter: music_songs
+    timeout: 3.0
+  - name: piratebay
+    engine: piratebay
+    shortcut: tpb
+    # You may need to change this URL to a proxy if piratebay is blocked in your
+    # country
+    url: https://thepiratebay.org/
+    timeout: 3.0
+  - name: podcastindex
+    engine: podcastindex
+    shortcut: podcast
+  # Required dependency: psychopg2
+  #  - name: postgresql
+  #    engine: postgresql
+  #    database: postgres
+  #    username: postgres
+  #    password: postgres
+  #    limit: 10
+  #    query_str: 'SELECT * from my_table WHERE my_column = %(query)s'
+  #    shortcut : psql
+  - name: presearch
+    engine: presearch
+    search_type: search
+    categories: [general, web]
+    shortcut: ps
+    timeout: 4.0
+    disabled: true
+  - name: presearch images
+    engine: presearch
+    network: presearch
+    search_type: images
+    categories: [images, web]
+    timeout: 4.0
+    shortcut: psimg
+    disabled: true
+  - name: presearch videos
+    engine: presearch
+    network: presearch
+    search_type: videos
+    categories: [general, web]
+    timeout: 4.0
+    shortcut: psvid
+    disabled: true
+  - name: presearch news
+    engine: presearch
+    network: presearch
+    search_type: news
+    categories: [news, web]
+    timeout: 4.0
+    shortcut: psnews
+    disabled: true
+  - name: pub.dev
+    engine: xpath
+    shortcut: pd
+    search_url: https://pub.dev/packages?q={query}&page={pageno}
+    paging: true
+    results_xpath: //div[contains(@class,"packages-item")]
+    url_xpath: ./div/h3/a/@href
+    title_xpath: ./div/h3/a
+    content_xpath: ./div/div/div[contains(@class,"packages-description")]/span
+    categories: [packages, it]
+    timeout: 3.0
+    disabled: true
+    first_page_num: 1
+    about:
+      website: https://pub.dev/
+      official_api_documentation: https://pub.dev/help/api
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+  - name: pubmed
+    engine: pubmed
+    shortcut: pub
+    timeout: 3.0
+  - name: pypi
+    shortcut: pypi
+    engine: xpath
+    paging: true
+    search_url: https://pypi.org/search/?q={query}&page={pageno}
+    results_xpath: /html/body/main/div/div/div/form/div/ul/li/a[@class="package-snippet"]
+    url_xpath: ./@href
+    title_xpath: ./h3/span[@class="package-snippet__name"]
+    content_xpath: ./p
+    suggestion_xpath: /html/body/main/div/div/div/form/div/div[@class="callout-block"]/p/span/a[@class="link"]
+    first_page_num: 1
+    categories: [it, packages]
+    about:
+      website: https://pypi.org
+      wikidata_id: Q2984686
+      official_api_documentation: https://warehouse.readthedocs.io/api-reference/index.html
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+  - name: qwant
+    qwant_categ: web
+    engine: qwant
+    shortcut: qw
+    categories: [general, web]
+    additional_tests:
+      rosebud: *test_rosebud
+  - name: qwant news
+    qwant_categ: news
+    engine: qwant
+    shortcut: qwn
+    categories: news
+    network: qwant
+  - name: qwant images
+    qwant_categ: images
+    engine: qwant
+    shortcut: qwi
+    categories: [images, web]
+    network: qwant
+  - name: qwant videos
+    qwant_categ: videos
+    engine: qwant
+    shortcut: qwv
+    categories: [videos, web]
+    network: qwant
+  # - name: library
+  #   engine: recoll
+  #   shortcut: lib
+  #   base_url: 'https://recoll.example.org/'
+  #   search_dir: ''
+  #   mount_prefix: /export
+  #   dl_prefix: 'https://download.example.org'
+  #   timeout: 30.0
+  #   categories: files
+  #   disabled: true
+  # - name: recoll library reference
+  #   engine: recoll
+  #   base_url: 'https://recoll.example.org/'
+  #   search_dir: reference
+  #   mount_prefix: /export
+  #   dl_prefix: 'https://download.example.org'
+  #   shortcut: libr
+  #   timeout: 30.0
+  #   categories: files
+  #   disabled: true
+  - name: radio browser
+    engine: radio_browser
+    shortcut: rb
+  - name: reddit
+    engine: reddit
+    shortcut: re
+    page_size: 25
+  - name: rottentomatoes
+    engine: rottentomatoes
+    shortcut: rt
+    disabled: true
+  # Required dependency: redis
+  # - name: myredis
+  #   shortcut : rds
+  #   engine: redis_server
+  #   exact_match_only: false
+  #   host: '127.0.0.1'
+  #   port: 6379
+  #   enable_http: true
+  #   password: ''
+  #   db: 0
+  # tmp suspended: bad certificate
+  #  - name: scanr structures
+  #    shortcut: scs
+  #    engine: scanr_structures
+  #    disabled: true
+  - name: sepiasearch
+    engine: sepiasearch
+    shortcut: sep
+  - name: soundcloud
+    engine: soundcloud
+    shortcut: sc
+  - name: stackoverflow
+    engine: stackexchange
+    shortcut: st
+    api_site: 'stackoverflow'
+    categories: [it, q&a]
+  - name: askubuntu
+    engine: stackexchange
+    shortcut: ubuntu
+    api_site: 'askubuntu'
+    categories: [it, q&a]
+  - name: internetarchivescholar
+    engine: internet_archive_scholar
+    shortcut: ias
+    timeout: 5.0
+  - name: superuser
+    engine: stackexchange
+    shortcut: su
+    api_site: 'superuser'
+    categories: [it, q&a]
+  - name: searchcode code
+    engine: searchcode_code
+    shortcut: scc
+    disabled: true
+  # - name: searx
+  #   engine: searx_engine
+  #   shortcut: se
+  #   instance_urls :
+  #       - http://127.0.0.1:8888/
+  #       - ...
+  #   disabled: true
+  - name: semantic scholar
+    engine: semantic_scholar
+    disabled: true
+    shortcut: se
+  # Spotify needs API credentials
+  # - name: spotify
+  #   engine: spotify
+  #   shortcut: stf
+  #   api_client_id: *******
+  #   api_client_secret: *******
+  # - name: solr
+  #   engine: solr
+  #   shortcut: slr
+  #   base_url: http://localhost:8983
+  #   collection: collection_name
+  #   sort: '' # sorting: asc or desc
+  #   field_list: '' # comma separated list of field names to display on the UI
+  #   default_fields: '' # default field to query
+  #   query_fields: '' # query fields
+  #   enable_http: true
+  # - name: springer nature
+  #   engine: springer
+  #   # get your API key from: https://dev.springernature.com/signup
+  #   # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601"
+  #   api_key: 'unset'
+  #   shortcut: springer
+  #   timeout: 15.0
+  - name: startpage
+    engine: startpage
+    shortcut: sp
+    timeout: 6.0
+    disabled: true
+    additional_tests:
+      rosebud: *test_rosebud
+  - name: tokyotoshokan
+    engine: tokyotoshokan
+    shortcut: tt
+    timeout: 6.0
+    disabled: true
+  - name: solidtorrents
+    engine: solidtorrents
+    shortcut: solid
+    timeout: 4.0
+    base_url:
+      - https://solidtorrents.to
+      - https://bitsearch.to
+  # For this demo of the sqlite engine download:
+  #   https://liste.mediathekview.de/filmliste-v2.db.bz2
+  # and unpack into searx/data/filmliste-v2.db
+  # Query to test: "!demo concert"
+  #
+  # - name: demo
+  #   engine: sqlite
+  #   shortcut: demo
+  #   categories: general
+  #   result_template: default.html
+  #   database: searx/data/filmliste-v2.db
+  #   query_str:  >-
+  #     SELECT title || ' (' || time(duration, 'unixepoch') || ')' AS title,
+  #            COALESCE( NULLIF(url_video_hd,''), NULLIF(url_video_sd,''), url_video) AS url,
+  #            description AS content
+  #       FROM film
+  #      WHERE title LIKE :wildcard OR description LIKE :wildcard
+  #      ORDER BY duration DESC
+  - name: tagesschau
+    engine: tagesschau
+    # when set to false, display URLs from Tagesschau, and not the actual source
+    # (e.g. NDR, WDR, SWR, HR, ...)
+    use_source_url: true
+    shortcut: ts
+    disabled: true
+  - name: tmdb
+    engine: xpath
+    paging: true
+    categories: movies
+    search_url: https://www.themoviedb.org/search?page={pageno}&query={query}
+    results_xpath: //div[contains(@class,"movie") or contains(@class,"tv")]//div[contains(@class,"card")]
+    url_xpath: .//div[contains(@class,"poster")]/a/@href
+    thumbnail_xpath: .//img/@src
+    title_xpath: .//div[contains(@class,"title")]//h2
+    content_xpath: .//div[contains(@class,"overview")]
+    shortcut: tm
+    disabled: true
+  # Requires Tor
+  - name: torch
+    engine: xpath
+    paging: true
+    search_url: http://xmh57jrknzkhv6y3ls3ubitzfqnkrwxhopf5aygthi7d6rplyvk3noyd.onion/cgi-bin/omega/omega?P={query}&DEFAULTOP=and
+    results_xpath: //table//tr
+    url_xpath: ./td[2]/a
+    title_xpath: ./td[2]/b
+    content_xpath: ./td[2]/small
+    categories: onions
+    enable_http: true
+    shortcut: tch
+  # torznab engine lets you query any torznab compatible indexer.  Using this
+  # engine in combination with Jackett opens the possibility to query a lot of
+  # public and private indexers directly from SearXNG. More details at:
+  # https://docs.searxng.org/dev/engines/online/torznab.html
+  #
+  # - name: Torznab EZTV
+  #   engine: torznab
+  #   shortcut: eztv
+  #   base_url: http://localhost:9117/api/v2.0/indexers/eztv/results/torznab
+  #   enable_http: true  # if using localhost
+  #   api_key: xxxxxxxxxxxxxxx
+  #   show_magnet_links: true
+  #   show_torrent_files: false
+  #   # https://github.com/Jackett/Jackett/wiki/Jackett-Categories
+  #   torznab_categories:  # optional
+  #     - 2000
+  #     - 5000
+  # tmp suspended - too slow, too many errors
+  #  - name: urbandictionary
+  #    engine      : xpath
+  #    search_url  : https://www.urbandictionary.com/define.php?term={query}
+  #    url_xpath   : //*[@class="word"]/@href
+  #    title_xpath : //*[@class="def-header"]
+  #    content_xpath: //*[@class="meaning"]
+  #    shortcut: ud
+  - name: unsplash
+    engine: unsplash
+    shortcut: us
+  - name: yandex music
+    engine: yandex_music
+    shortcut: ydm
+    disabled: true
+    # https://yandex.com/support/music/access.html
+    inactive: true
+  - name: yahoo
+    engine: yahoo
+    shortcut: yh
+    disabled: true
+  - name: yahoo news
+    engine: yahoo_news
+    shortcut: yhn
+  - name: youtube
+    shortcut: yt
+    # You can use the engine using the official stable API, but you need an API
+    # key See: https://console.developers.google.com/project
+    #
+    # engine: youtube_api
+    # api_key: 'apikey' # required!
+    #
+    # Or you can use the html non-stable engine, activated by default
+    engine: youtube_noapi
+  - name: dailymotion
+    engine: dailymotion
+    shortcut: dm
+  - name: vimeo
+    engine: vimeo
+    shortcut: vm
+  - name: wiby
+    engine: json_engine
+    paging: true
+    search_url: https://wiby.me/json/?q={query}&p={pageno}
+    url_query: URL
+    title_query: Title
+    content_query: Snippet
+    categories: [general, web]
+    shortcut: wib
+    disabled: true
+    about:
+      website: https://wiby.me/
+  - name: alexandria
+    engine: json_engine
+    shortcut: alx
+    categories: general
+    paging: true
+    search_url: https://api.alexandria.org/?a=1&q={query}&p={pageno}
+    results_query: results
+    title_query: title
+    url_query: url
+    content_query: snippet
+    timeout: 1.5
+    disabled: true
+    about:
+      website: https://alexandria.org/
+      official_api_documentation: https://github.com/alexandria-org/alexandria-api/raw/master/README.md
+      use_official_api: true
+      require_api_key: false
+      results: JSON
+  - name: wikibooks
+    engine: mediawiki
+    weight: 0.5
+    shortcut: wb
+    categories: [general, wikimedia]
+    base_url: 'https://{language}.wikibooks.org/'
+    search_type: text
+    disabled: true
+    about:
+      website: https://www.wikibooks.org/
+      wikidata_id: Q367
+  - name: wikinews
+    engine: mediawiki
+    shortcut: wn
+    categories: [news, wikimedia]
+    base_url: 'https://{language}.wikinews.org/'
+    search_type: text
+    srsort: create_timestamp_desc
+    about:
+      website: https://www.wikinews.org/
+      wikidata_id: Q964
+  - name: wikiquote
+    engine: mediawiki
+    weight: 0.5
+    shortcut: wq
+    categories: [general, wikimedia]
+    base_url: 'https://{language}.wikiquote.org/'
+    search_type: text
+    disabled: true
+    additional_tests:
+      rosebud: *test_rosebud
+    about:
+      website: https://www.wikiquote.org/
+      wikidata_id: Q369
+  - name: wikisource
+    engine: mediawiki
+    weight: 0.5
+    shortcut: ws
+    categories: [general, wikimedia]
+    base_url: 'https://{language}.wikisource.org/'
+    search_type: text
+    disabled: true
+    about:
+      website: https://www.wikisource.org/
+      wikidata_id: Q263
+  - name: wikispecies
+    engine: mediawiki
+    shortcut: wsp
+    categories: [general, science, wikimedia]
+    base_url: 'https://species.wikimedia.org/'
+    search_type: text
+    disabled: true
+    about:
+      website: https://species.wikimedia.org/
+      wikidata_id: Q13679
+  - name: wiktionary
+    engine: mediawiki
+    shortcut: wt
+    categories: [dictionaries, wikimedia]
+    base_url: 'https://{language}.wiktionary.org/'
+    search_type: text
+    about:
+      website: https://www.wiktionary.org/
+      wikidata_id: Q151
+  - name: wikiversity
+    engine: mediawiki
+    weight: 0.5
+    shortcut: wv
+    categories: [general, wikimedia]
+    base_url: 'https://{language}.wikiversity.org/'
+    search_type: text
+    disabled: true
+    about:
+      website: https://www.wikiversity.org/
+      wikidata_id: Q370
+  - name: wikivoyage
+    engine: mediawiki
+    weight: 0.5
+    shortcut: wy
+    categories: [general, wikimedia]
+    base_url: 'https://{language}.wikivoyage.org/'
+    search_type: text
+    disabled: true
+    about:
+      website: https://www.wikivoyage.org/
+      wikidata_id: Q373
+  - name: wikicommons.images
+    engine: wikicommons
+    shortcut: wc
+    categories: images
+    number_of_results: 10
+  - name: wolframalpha
+    shortcut: wa
+    # You can use the engine using the official stable API, but you need an API
+    # key.  See: https://products.wolframalpha.com/api/
+    #
+    # engine: wolframalpha_api
+    # api_key: ''
+    #
+    # Or you can use the html non-stable engine, activated by default
+    engine: wolframalpha_noapi
+    timeout: 6.0
+    categories: general
+    disabled: false
+  - name: dictzone
+    engine: dictzone
+    shortcut: dc
+  - name: mymemory translated
+    engine: translated
+    shortcut: tl
+    timeout: 5.0
+    # You can use without an API key, but you are limited to 1000 words/day
+    # See: https://mymemory.translated.net/doc/usagelimits.php
+    # api_key: ''
+  # Required dependency: mysql-connector-python
+  #  - name: mysql
+  #    engine: mysql_server
+  #    database: mydatabase
+  #    username: user
+  #    password: pass
+  #    limit: 10
+  #    query_str: 'SELECT * from mytable WHERE fieldname=%(query)s'
+  #    shortcut: mysql
+  - name: 1337x
+    engine: 1337x
+    shortcut: 1337x
+    disabled: true
+  - name: duden
+    engine: duden
+    shortcut: du
+    disabled: true
+  - name: seznam
+    shortcut: szn
+    engine: seznam
+    disabled: true
+  # - name: deepl
+  #   engine: deepl
+  #   shortcut: dpl
+  #   # You can use the engine using the official stable API, but you need an API key
+  #   # See: https://www.deepl.com/pro-api?cta=header-pro-api
+  #   api_key: ''  # required!
+  #   timeout: 5.0
+  #   disabled: true
+  - name: mojeek
+    shortcut: mjk
+    engine: xpath
+    paging: true
+    categories: [general, web]
+    search_url: https://www.mojeek.com/search?q={query}&s={pageno}&lang={lang}&lb={lang}
+    results_xpath: //ul[@class="results-standard"]/li/a[@class="ob"]
+    url_xpath: ./@href
+    title_xpath: ../h2/a
+    content_xpath: ..//p[@class="s"]
+    suggestion_xpath: //div[@class="top-info"]/p[@class="top-info spell"]/em/a
+    first_page_num: 0
+    page_size: 10
+    max_page: 100
+    disabled: true
+    about:
+      website: https://www.mojeek.com/
+      wikidata_id: Q60747299
+      official_api_documentation: https://www.mojeek.com/services/api.html/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+  - name: moviepilot
+    engine: moviepilot
+    shortcut: mp
+    disabled: true
+  - name: naver
+    shortcut: nvr
+    categories: [general, web]
+    engine: xpath
+    paging: true
+    search_url: https://search.naver.com/search.naver?where=webkr&sm=osp_hty&ie=UTF-8&query={query}&start={pageno}
+    url_xpath: //a[@class="link_tit"]/@href
+    title_xpath: //a[@class="link_tit"]
+    content_xpath: //a[@class="total_dsc"]/div
+    first_page_num: 1
+    page_size: 10
+    disabled: true
+    about:
+      website: https://www.naver.com/
+      wikidata_id: Q485639
+      official_api_documentation: https://developers.naver.com/docs/nmt/examples/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+      language: ko
+  - name: rubygems
+    shortcut: rbg
+    engine: xpath
+    paging: true
+    search_url: https://rubygems.org/search?page={pageno}&query={query}
+    results_xpath: /html/body/main/div/a[@class="gems__gem"]
+    url_xpath: ./@href
+    title_xpath: ./span/h2
+    content_xpath: ./span/p
+    suggestion_xpath: /html/body/main/div/div[@class="search__suggestions"]/p/a
+    first_page_num: 1
+    categories: [it, packages]
+    disabled: true
+    about:
+      website: https://rubygems.org/
+      wikidata_id: Q1853420
+      official_api_documentation: https://guides.rubygems.org/rubygems-org-api/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+  - name: peertube
+    engine: peertube
+    shortcut: ptb
+    paging: true
+    # alternatives see: https://instances.joinpeertube.org/instances
+    # base_url: https://tube.4aem.com
+    categories: videos
+    disabled: true
+    timeout: 6.0
+  - name: mediathekviewweb
+    engine: mediathekviewweb
+    shortcut: mvw
+    disabled: true
+  - name: yacy
+    engine: yacy
+    categories: general
+    search_type: text
+    base_url: https://yacy.searchlab.eu
+    shortcut: ya
+    disabled: true
+    # required if you aren't using HTTPS for your local yacy instance
+    # https://docs.searxng.org/dev/engines/online/yacy.html
+    # enable_http: true
+    # timeout: 3.0
+    # search_mode: 'global'
+  - name: yacy images
+    engine: yacy
+    categories: images
+    search_type: image
+    base_url: https://yacy.searchlab.eu
+    shortcut: yai
+    disabled: true
+  - name: rumble
+    engine: rumble
+    shortcut: ru
+    base_url: https://rumble.com/
+    paging: true
+    categories: videos
+    disabled: true
+  - name: livespace
+    engine: livespace
+    shortcut: ls
+    categories: videos
+    disabled: true
+    timeout: 5.0
+  - name: wordnik
+    engine: wordnik
+    shortcut: def
+    base_url: https://www.wordnik.com/
+    categories: [dictionaries]
+    timeout: 5.0
+  - name: woxikon.de synonyme
+    engine: xpath
+    shortcut: woxi
+    categories: [dictionaries]
+    timeout: 5.0
+    disabled: true
+    search_url: https://synonyme.woxikon.de/synonyme/{query}.php
+    url_xpath: //div[@class="upper-synonyms"]/a/@href
+    content_xpath: //div[@class="synonyms-list-group"]
+    title_xpath: //div[@class="upper-synonyms"]/a
+    no_result_for_http_status: [404]
+    about:
+      website: https://www.woxikon.de/
+      wikidata_id: # No Wikidata ID
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+      language: de
+  - name: seekr news
+    engine: seekr
+    shortcut: senews
+    categories: news
+    seekr_category: news
+    disabled: true
+  - name: seekr images
+    engine: seekr
+    network: seekr news
+    shortcut: seimg
+    categories: images
+    seekr_category: images
+    disabled: true
+  - name: seekr videos
+    engine: seekr
+    network: seekr news
+    shortcut: sevid
+    categories: videos
+    seekr_category: videos
+    disabled: true
+  - name: sjp.pwn
+    engine: sjp
+    shortcut: sjp
+    base_url: https://sjp.pwn.pl/
+    timeout: 5.0
+    disabled: true
+  - name: stract
+    engine: stract
+    shortcut: str
+    disabled: true
+  - name: svgrepo
+    engine: svgrepo
+    shortcut: svg
+    timeout: 10.0
+    disabled: true
+  - name: tootfinder
+    engine: tootfinder
+    shortcut: toot
+  - name: wallhaven
+    engine: wallhaven
+    # api_key: abcdefghijklmnopqrstuvwxyz
+    shortcut: wh
+    # wikimini: online encyclopedia for children
+    # The fulltext and title parameter is necessary for Wikimini because
+    # sometimes it will not show the results and redirect instead
+  - name: wikimini
+    engine: xpath
+    shortcut: wkmn
+    search_url: https://fr.wikimini.org/w/index.php?search={query}&title=Sp%C3%A9cial%3ASearch&fulltext=Search
+    url_xpath: //li/div[@class="mw-search-result-heading"]/a/@href
+    title_xpath: //li//div[@class="mw-search-result-heading"]/a
+    content_xpath: //li/div[@class="searchresult"]
+    categories: general
+    disabled: true
+    about:
+      website: https://wikimini.org/
+      wikidata_id: Q3568032
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+      language: fr
+  - name: wttr.in
+    engine: wttr
+    shortcut: wttr
+    timeout: 9.0
+  - name: yummly
+    engine: yummly
+    shortcut: yum
+    disabled: true
+  - name: brave
+    engine: brave
+    shortcut: br
+    time_range_support: true
+    paging: true
+    categories: [general, web]
+    brave_category: search
+    # brave_spellcheck: true
+  - name: brave.images
+    engine: brave
+    network: brave
+    shortcut: brimg
+    categories: [images, web]
+    brave_category: images
+  - name: brave.videos
+    engine: brave
+    network: brave
+    shortcut: brvid
+    categories: [videos, web]
+    brave_category: videos
+  - name: brave.news
+    engine: brave
+    network: brave
+    shortcut: brnews
+    categories: news
+    brave_category: news
+  # - name: brave.goggles
+  #   engine: brave
+  #   network: brave
+  #   shortcut: brgog
+  #   time_range_support: true
+  #   paging: true
+  #   categories: [general, web]
+  #   brave_category: goggles
+  #   Goggles: # required! This should be a URL ending in .goggle
+  - name: lib.rs
+    shortcut: lrs
+    engine: xpath
+    search_url: https://lib.rs/search?q={query}
+    results_xpath: /html/body/main/div/ol/li/a
+    url_xpath: ./@href
+    title_xpath: ./div[@class="h"]/h4
+    content_xpath: ./div[@class="h"]/p
+    categories: [it, packages]
+    disabled: true
+    about:
+      website: https://lib.rs
+      wikidata_id: Q113486010
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+  - name: sourcehut
+    shortcut: srht
+    engine: xpath
+    paging: true
+    search_url: https://sr.ht/projects?page={pageno}&search={query}
+    results_xpath: (//div[@class="event-list"])[1]/div[@class="event"]
+    url_xpath: ./h4/a[2]/@href
+    title_xpath: ./h4/a[2]
+    content_xpath: ./p
+    first_page_num: 1
+    categories: [it, repos]
+    disabled: true
+    about:
+      website: https://sr.ht
+      wikidata_id: Q78514485
+      official_api_documentation: https://man.sr.ht/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+  - name: goo
+    shortcut: goo
+    engine: xpath
+    paging: true
+    search_url: https://search.goo.ne.jp/web.jsp?MT={query}&FR={pageno}0
+    url_xpath: //div[@class="result"]/p[@class='title fsL1']/a/@href
+    title_xpath: //div[@class="result"]/p[@class='title fsL1']/a
+    content_xpath: //p[contains(@class,'url fsM')]/following-sibling::p
+    first_page_num: 0
+    categories: [general, web]
+    disabled: true
+    timeout: 4.0
+    about:
+      website: https://search.goo.ne.jp
+      wikidata_id: Q249044
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+      language: ja
+  - name: bt4g
+    engine: bt4g
+    shortcut: bt4g
+  - name: pkg.go.dev
+    engine: xpath
+    shortcut: pgo
+    search_url: https://pkg.go.dev/search?limit=100&m=package&q={query}
+    results_xpath: /html/body/main/div[contains(@class,"SearchResults")]/div[not(@class)]/div[@class="SearchSnippet"]
+    url_xpath: ./div[@class="SearchSnippet-headerContainer"]/h2/a/@href
+    title_xpath: ./div[@class="SearchSnippet-headerContainer"]/h2/a
+    content_xpath: ./p[@class="SearchSnippet-synopsis"]
+    categories: [packages, it]
+    timeout: 3.0
+    disabled: true
+    about:
+      website: https://pkg.go.dev/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+# Doku engine lets you access to any Doku wiki instance:
+# A public one or a privete/corporate one.
+#  - name: ubuntuwiki
+#    engine: doku
+#    shortcut: uw
+#    base_url: 'https://doc.ubuntu-fr.org'
+# Be careful when enabling this engine if you are
+# running a public instance. Do not expose any sensitive
+# information. You can restrict access by configuring a list
+# of access tokens under tokens.
+#  - name: git grep
+#    engine: command
+#    command: ['git', 'grep', '{{QUERY}}']
+#    shortcut: gg
+#    tokens: []
+#    disabled: true
+#    delimiter:
+#        chars: ':'
+#        keys: ['filepath', 'code']
+# Be careful when enabling this engine if you are
+# running a public instance. Do not expose any sensitive
+# information. You can restrict access by configuring a list
+# of access tokens under tokens.
+#  - name: locate
+#    engine: command
+#    command: ['locate', '{{QUERY}}']
+#    shortcut: loc
+#    tokens: []
+#    disabled: true
+#    delimiter:
+#        chars: ' '
+#        keys: ['line']
+# Be careful when enabling this engine if you are
+# running a public instance. Do not expose any sensitive
+# information. You can restrict access by configuring a list
+# of access tokens under tokens.
+#  - name: find
+#    engine: command
+#    command: ['find', '.', '-name', '{{QUERY}}']
+#    query_type: path
+#    shortcut: fnd
+#    tokens: []
+#    disabled: true
+#    delimiter:
+#        chars: ' '
+#        keys: ['line']
+# Be careful when enabling this engine if you are
+# running a public instance. Do not expose any sensitive
+# information. You can restrict access by configuring a list
+# of access tokens under tokens.
+#  - name: pattern search in files
+#    engine: command
+#    command: ['fgrep', '{{QUERY}}']
+#    shortcut: fgr
+#    tokens: []
+#    disabled: true
+#    delimiter:
+#        chars: ' '
+#        keys: ['line']
+# Be careful when enabling this engine if you are
+# running a public instance. Do not expose any sensitive
+# information. You can restrict access by configuring a list
+# of access tokens under tokens.
+#  - name: regex search in files
+#    engine: command
+#    command: ['grep', '{{QUERY}}']
+#    shortcut: gr
+#    tokens: []
+#    disabled: true
+#    delimiter:
+#        chars: ' '
+#        keys: ['line']
+doi_resolvers:
+  oadoi.org: 'https://oadoi.org/'
+  doi.org: 'https://doi.org/'
+  doai.io: 'https://dissem.in/'
+  sci-hub.se: 'https://sci-hub.se/'
+  sci-hub.st: 'https://sci-hub.st/'
+  sci-hub.ru: 'https://sci-hub.ru/'
+default_doi_resolver: 'oadoi.org'

searxng.dockerfile ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ FROM searxng/searxng
2	+
3	+ COPY searxng-settings.yml /etc/searxng/settings.yml

src/Perplexica - Shortcut.lnk ADDED Viewed

Binary file (868 Bytes). View file

src/agents/academicSearchAgent.ts ADDED Viewed

	@@ -0,0 +1,265 @@

+import { BaseMessage } from '@langchain/core/messages';
+import {
+  PromptTemplate,
+  ChatPromptTemplate,
+  MessagesPlaceholder,
+} from '@langchain/core/prompts';
+import {
+  RunnableSequence,
+  RunnableMap,
+  RunnableLambda,
+} from '@langchain/core/runnables';
+import { StringOutputParser } from '@langchain/core/output_parsers';
+import { Document } from '@langchain/core/documents';
+import { searchSearxng } from '../lib/searxng';
+import type { StreamEvent } from '@langchain/core/tracers/log_stream';
+import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import type { Embeddings } from '@langchain/core/embeddings';
+import formatChatHistoryAsString from '../utils/formatHistory';
+import eventEmitter from 'events';
+import computeSimilarity from '../utils/computeSimilarity';
+import logger from '../utils/logger';
+const basicAcademicSearchRetrieverPrompt = `
+You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
+If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
+Example:
+1. Follow up question: How does stable diffusion work?
+Rephrased: Stable diffusion working
+2. Follow up question: What is linear algebra?
+Rephrased: Linear algebra
+3. Follow up question: What is the third law of thermodynamics?
+Rephrased: Third law of thermodynamics
+Conversation:
+{chat_history}
+Follow up question: {query}
+Rephrased question:
+`;
+const basicAcademicSearchResponsePrompt = `
+    You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are set on focus mode 'Academic', this means you will be searching for academic papers and articles on the web.
+    Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containg a brief description of the content of that page).
+    You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text.
+    You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them.
+    Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative.
+    You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from.
+    Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2].
+    However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer.
+    Aything inside the following \`context\` HTML block provided below is for your knowledge returned by the search engine and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to
+    talk about the context in your response.
+    <context>
+    {context}
+    </context>
+    If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'.
+    Anything between the \`context\` is retrieved from a search engine and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()}
+`;
+const strParser = new StringOutputParser();
+const handleStream = async (
+  stream: AsyncGenerator<StreamEvent, any, unknown>,
+  emitter: eventEmitter,
+) => {
+  for await (const event of stream) {
+    if (
+      event.event === 'on_chain_end' &&
+      event.name === 'FinalSourceRetriever'
+    ) {
+      emitter.emit(
+        'data',
+        JSON.stringify({ type: 'sources', data: event.data.output }),
+      );
+    }
+    if (
+      event.event === 'on_chain_stream' &&
+      event.name === 'FinalResponseGenerator'
+    ) {
+      emitter.emit(
+        'data',
+        JSON.stringify({ type: 'response', data: event.data.chunk }),
+      );
+    }
+    if (
+      event.event === 'on_chain_end' &&
+      event.name === 'FinalResponseGenerator'
+    ) {
+      emitter.emit('end');
+    }
+  }
+};
+type BasicChainInput = {
+  chat_history: BaseMessage[];
+  query: string;
+};
+const createBasicAcademicSearchRetrieverChain = (llm: BaseChatModel) => {
+  return RunnableSequence.from([
+    PromptTemplate.fromTemplate(basicAcademicSearchRetrieverPrompt),
+    llm,
+    strParser,
+    RunnableLambda.from(async (input: string) => {
+      if (input === 'not_needed') {
+        return { query: '', docs: [] };
+      }
+      const res = await searchSearxng(input, {
+        language: 'en',
+        engines: [
+          'arxiv',
+          'google scholar',
+          'internetarchivescholar',
+          'pubmed',
+        ],
+      });
+      const documents = res.results.map(
+        (result) =>
+          new Document({
+            pageContent: result.content,
+            metadata: {
+              title: result.title,
+              url: result.url,
+              ...(result.img_src && { img_src: result.img_src }),
+            },
+          }),
+      );
+      return { query: input, docs: documents };
+    }),
+  ]);
+};
+const createBasicAcademicSearchAnsweringChain = (
+  llm: BaseChatModel,
+  embeddings: Embeddings,
+) => {
+  const basicAcademicSearchRetrieverChain =
+    createBasicAcademicSearchRetrieverChain(llm);
+  const processDocs = async (docs: Document[]) => {
+    return docs
+      .map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
+      .join('\n');
+  };
+  const rerankDocs = async ({
+    query,
+    docs,
+  }: {
+    query: string;
+    docs: Document[];
+  }) => {
+    if (docs.length === 0) {
+      return docs;
+    }
+    const docsWithContent = docs.filter(
+      (doc) => doc.pageContent && doc.pageContent.length > 0,
+    );
+    const [docEmbeddings, queryEmbedding] = await Promise.all([
+      embeddings.embedDocuments(docsWithContent.map((doc) => doc.pageContent)),
+      embeddings.embedQuery(query),
+    ]);
+    const similarity = docEmbeddings.map((docEmbedding, i) => {
+      const sim = computeSimilarity(queryEmbedding, docEmbedding);
+      return {
+        index: i,
+        similarity: sim,
+      };
+    });
+    const sortedDocs = similarity
+      .sort((a, b) => b.similarity - a.similarity)
+      .slice(0, 15)
+      .map((sim) => docsWithContent[sim.index]);
+    return sortedDocs;
+  };
+  return RunnableSequence.from([
+    RunnableMap.from({
+      query: (input: BasicChainInput) => input.query,
+      chat_history: (input: BasicChainInput) => input.chat_history,
+      context: RunnableSequence.from([
+        (input) => ({
+          query: input.query,
+          chat_history: formatChatHistoryAsString(input.chat_history),
+        }),
+        basicAcademicSearchRetrieverChain
+          .pipe(rerankDocs)
+          .withConfig({
+            runName: 'FinalSourceRetriever',
+          })
+          .pipe(processDocs),
+      ]),
+    }),
+    ChatPromptTemplate.fromMessages([
+      ['system', basicAcademicSearchResponsePrompt],
+      new MessagesPlaceholder('chat_history'),
+      ['user', '{query}'],
+    ]),
+    llm,
+    strParser,
+  ]).withConfig({
+    runName: 'FinalResponseGenerator',
+  });
+};
+const basicAcademicSearch = (
+  query: string,
+  history: BaseMessage[],
+  llm: BaseChatModel,
+  embeddings: Embeddings,
+) => {
+  const emitter = new eventEmitter();
+  try {
+    const basicAcademicSearchAnsweringChain =
+      createBasicAcademicSearchAnsweringChain(llm, embeddings);
+    const stream = basicAcademicSearchAnsweringChain.streamEvents(
+      {
+        chat_history: history,
+        query: query,
+      },
+      {
+        version: 'v1',
+      },
+    );
+    handleStream(stream, emitter);
+  } catch (err) {
+    emitter.emit(
+      'error',
+      JSON.stringify({ data: 'An error has occurred please try again later' }),
+    );
+    logger.error(`Error in academic search: ${err}`);
+  }
+  return emitter;
+};
+const handleAcademicSearch = (
+  message: string,
+  history: BaseMessage[],
+  llm: BaseChatModel,
+  embeddings: Embeddings,
+) => {
+  const emitter = basicAcademicSearch(message, history, llm, embeddings);
+  return emitter;
+};
+export default handleAcademicSearch;

src/agents/imageSearchAgent.ts ADDED Viewed

	@@ -0,0 +1,84 @@

+import {
+  RunnableSequence,
+  RunnableMap,
+  RunnableLambda,
+} from '@langchain/core/runnables';
+import { PromptTemplate } from '@langchain/core/prompts';
+import formatChatHistoryAsString from '../utils/formatHistory';
+import { BaseMessage } from '@langchain/core/messages';
+import { StringOutputParser } from '@langchain/core/output_parsers';
+import { searchSearxng } from '../lib/searxng';
+import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
+const imageSearchChainPrompt = `
+You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search the web for images.
+You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation.
+Example:
+1. Follow up question: What is a cat?
+Rephrased: A cat
+2. Follow up question: What is a car? How does it works?
+Rephrased: Car working
+3. Follow up question: How does an AC work?
+Rephrased: AC working
+Conversation:
+{chat_history}
+Follow up question: {query}
+Rephrased question:
+`;
+type ImageSearchChainInput = {
+  chat_history: BaseMessage[];
+  query: string;
+};
+const strParser = new StringOutputParser();
+const createImageSearchChain = (llm: BaseChatModel) => {
+  return RunnableSequence.from([
+    RunnableMap.from({
+      chat_history: (input: ImageSearchChainInput) => {
+        return formatChatHistoryAsString(input.chat_history);
+      },
+      query: (input: ImageSearchChainInput) => {
+        return input.query;
+      },
+    }),
+    PromptTemplate.fromTemplate(imageSearchChainPrompt),
+    llm,
+    strParser,
+    RunnableLambda.from(async (input: string) => {
+      const res = await searchSearxng(input, {
+        engines: ['bing images', 'google images'],
+      });
+      const images = [];
+      res.results.forEach((result) => {
+        if (result.img_src && result.url && result.title) {
+          images.push({
+            img_src: result.img_src,
+            url: result.url,
+            title: result.title,
+          });
+        }
+      });
+      return images.slice(0, 10);
+    }),
+  ]);
+};
+const handleImageSearch = (
+  input: ImageSearchChainInput,
+  llm: BaseChatModel,
+) => {
+  const imageSearchChain = createImageSearchChain(llm);
+  return imageSearchChain.invoke(input);
+};
+export default handleImageSearch;

src/agents/redditSearchAgent.ts ADDED Viewed

	@@ -0,0 +1,260 @@

+import { BaseMessage } from '@langchain/core/messages';
+import {
+  PromptTemplate,
+  ChatPromptTemplate,
+  MessagesPlaceholder,
+} from '@langchain/core/prompts';
+import {
+  RunnableSequence,
+  RunnableMap,
+  RunnableLambda,
+} from '@langchain/core/runnables';
+import { StringOutputParser } from '@langchain/core/output_parsers';
+import { Document } from '@langchain/core/documents';
+import { searchSearxng } from '../lib/searxng';
+import type { StreamEvent } from '@langchain/core/tracers/log_stream';
+import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import type { Embeddings } from '@langchain/core/embeddings';
+import formatChatHistoryAsString from '../utils/formatHistory';
+import eventEmitter from 'events';
+import computeSimilarity from '../utils/computeSimilarity';
+import logger from '../utils/logger';
+const basicRedditSearchRetrieverPrompt = `
+You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
+If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
+Example:
+1. Follow up question: Which company is most likely to create an AGI
+Rephrased: Which company is most likely to create an AGI
+2. Follow up question: Is Earth flat?
+Rephrased: Is Earth flat?
+3. Follow up question: Is there life on Mars?
+Rephrased: Is there life on Mars?
+Conversation:
+{chat_history}
+Follow up question: {query}
+Rephrased question:
+`;
+const basicRedditSearchResponsePrompt = `
+    You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are set on focus mode 'Reddit', this means you will be searching for information, opinions and discussions on the web using Reddit.
+    Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containg a brief description of the content of that page).
+    You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text.
+    You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them.
+    Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative.
+    You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from.
+    Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2].
+    However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer.
+    Aything inside the following \`context\` HTML block provided below is for your knowledge returned by Reddit and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to
+    talk about the context in your response.
+    <context>
+    {context}
+    </context>
+    If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'.
+    Anything between the \`context\` is retrieved from Reddit and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()}
+`;
+const strParser = new StringOutputParser();
+const handleStream = async (
+  stream: AsyncGenerator<StreamEvent, any, unknown>,
+  emitter: eventEmitter,
+) => {
+  for await (const event of stream) {
+    if (
+      event.event === 'on_chain_end' &&
+      event.name === 'FinalSourceRetriever'
+    ) {
+      emitter.emit(
+        'data',
+        JSON.stringify({ type: 'sources', data: event.data.output }),
+      );
+    }
+    if (
+      event.event === 'on_chain_stream' &&
+      event.name === 'FinalResponseGenerator'
+    ) {
+      emitter.emit(
+        'data',
+        JSON.stringify({ type: 'response', data: event.data.chunk }),
+      );
+    }
+    if (
+      event.event === 'on_chain_end' &&
+      event.name === 'FinalResponseGenerator'
+    ) {
+      emitter.emit('end');
+    }
+  }
+};
+type BasicChainInput = {
+  chat_history: BaseMessage[];
+  query: string;
+};
+const createBasicRedditSearchRetrieverChain = (llm: BaseChatModel) => {
+  return RunnableSequence.from([
+    PromptTemplate.fromTemplate(basicRedditSearchRetrieverPrompt),
+    llm,
+    strParser,
+    RunnableLambda.from(async (input: string) => {
+      if (input === 'not_needed') {
+        return { query: '', docs: [] };
+      }
+      const res = await searchSearxng(input, {
+        language: 'en',
+        engines: ['reddit'],
+      });
+      const documents = res.results.map(
+        (result) =>
+          new Document({
+            pageContent: result.content ? result.content : result.title,
+            metadata: {
+              title: result.title,
+              url: result.url,
+              ...(result.img_src && { img_src: result.img_src }),
+            },
+          }),
+      );
+      return { query: input, docs: documents };
+    }),
+  ]);
+};
+const createBasicRedditSearchAnsweringChain = (
+  llm: BaseChatModel,
+  embeddings: Embeddings,
+) => {
+  const basicRedditSearchRetrieverChain =
+    createBasicRedditSearchRetrieverChain(llm);
+  const processDocs = async (docs: Document[]) => {
+    return docs
+      .map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
+      .join('\n');
+  };
+  const rerankDocs = async ({
+    query,
+    docs,
+  }: {
+    query: string;
+    docs: Document[];
+  }) => {
+    if (docs.length === 0) {
+      return docs;
+    }
+    const docsWithContent = docs.filter(
+      (doc) => doc.pageContent && doc.pageContent.length > 0,
+    );
+    const [docEmbeddings, queryEmbedding] = await Promise.all([
+      embeddings.embedDocuments(docsWithContent.map((doc) => doc.pageContent)),
+      embeddings.embedQuery(query),
+    ]);
+    const similarity = docEmbeddings.map((docEmbedding, i) => {
+      const sim = computeSimilarity(queryEmbedding, docEmbedding);
+      return {
+        index: i,
+        similarity: sim,
+      };
+    });
+    const sortedDocs = similarity
+      .sort((a, b) => b.similarity - a.similarity)
+      .slice(0, 15)
+      .filter((sim) => sim.similarity > 0.3)
+      .map((sim) => docsWithContent[sim.index]);
+    return sortedDocs;
+  };
+  return RunnableSequence.from([
+    RunnableMap.from({
+      query: (input: BasicChainInput) => input.query,
+      chat_history: (input: BasicChainInput) => input.chat_history,
+      context: RunnableSequence.from([
+        (input) => ({
+          query: input.query,
+          chat_history: formatChatHistoryAsString(input.chat_history),
+        }),
+        basicRedditSearchRetrieverChain
+          .pipe(rerankDocs)
+          .withConfig({
+            runName: 'FinalSourceRetriever',
+          })
+          .pipe(processDocs),
+      ]),
+    }),
+    ChatPromptTemplate.fromMessages([
+      ['system', basicRedditSearchResponsePrompt],
+      new MessagesPlaceholder('chat_history'),
+      ['user', '{query}'],
+    ]),
+    llm,
+    strParser,
+  ]).withConfig({
+    runName: 'FinalResponseGenerator',
+  });
+};
+const basicRedditSearch = (
+  query: string,
+  history: BaseMessage[],
+  llm: BaseChatModel,
+  embeddings: Embeddings,
+) => {
+  const emitter = new eventEmitter();
+  try {
+    const basicRedditSearchAnsweringChain =
+      createBasicRedditSearchAnsweringChain(llm, embeddings);
+    const stream = basicRedditSearchAnsweringChain.streamEvents(
+      {
+        chat_history: history,
+        query: query,
+      },
+      {
+        version: 'v1',
+      },
+    );
+    handleStream(stream, emitter);
+  } catch (err) {
+    emitter.emit(
+      'error',
+      JSON.stringify({ data: 'An error has occurred please try again later' }),
+    );
+    logger.error(`Error in RedditSearch: ${err}`);
+  }
+  return emitter;
+};
+const handleRedditSearch = (
+  message: string,
+  history: BaseMessage[],
+  llm: BaseChatModel,
+  embeddings: Embeddings,
+) => {
+  const emitter = basicRedditSearch(message, history, llm, embeddings);
+  return emitter;
+};
+export default handleRedditSearch;

src/agents/videoSearchAgent.ts ADDED Viewed

	@@ -0,0 +1,90 @@

+import {
+  RunnableSequence,
+  RunnableMap,
+  RunnableLambda,
+} from '@langchain/core/runnables';
+import { PromptTemplate } from '@langchain/core/prompts';
+import formatChatHistoryAsString from '../utils/formatHistory';
+import { BaseMessage } from '@langchain/core/messages';
+import { StringOutputParser } from '@langchain/core/output_parsers';
+import { searchSearxng } from '../lib/searxng';
+import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
+const VideoSearchChainPrompt = `
+  You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search Youtube for videos.
+  You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation.
+  Example:
+  1. Follow up question: How does a car work?
+  Rephrased: How does a car work?
+  2. Follow up question: What is the theory of relativity?
+  Rephrased: What is theory of relativity
+  3. Follow up question: How does an AC work?
+  Rephrased: How does an AC work
+  Conversation:
+  {chat_history}
+  Follow up question: {query}
+  Rephrased question:
+  `;
+type VideoSearchChainInput = {
+  chat_history: BaseMessage[];
+  query: string;
+};
+const strParser = new StringOutputParser();
+const createVideoSearchChain = (llm: BaseChatModel) => {
+  return RunnableSequence.from([
+    RunnableMap.from({
+      chat_history: (input: VideoSearchChainInput) => {
+        return formatChatHistoryAsString(input.chat_history);
+      },
+      query: (input: VideoSearchChainInput) => {
+        return input.query;
+      },
+    }),
+    PromptTemplate.fromTemplate(VideoSearchChainPrompt),
+    llm,
+    strParser,
+    RunnableLambda.from(async (input: string) => {
+      const res = await searchSearxng(input, {
+        engines: ['youtube'],
+      });
+      const videos = [];
+      res.results.forEach((result) => {
+        if (
+          result.thumbnail &&
+          result.url &&
+          result.title &&
+          result.iframe_src
+        ) {
+          videos.push({
+            img_src: result.thumbnail,
+            url: result.url,
+            title: result.title,
+            iframe_src: result.iframe_src,
+          });
+        }
+      });
+      return videos.slice(0, 10);
+    }),
+  ]);
+};
+const handleVideoSearch = (
+  input: VideoSearchChainInput,
+  llm: BaseChatModel,
+) => {
+  const VideoSearchChain = createVideoSearchChain(llm);
+  return VideoSearchChain.invoke(input);
+};
+export default handleVideoSearch;

src/agents/webSearchAgent.ts ADDED Viewed

	@@ -0,0 +1,261 @@

+import { BaseMessage } from '@langchain/core/messages';
+import {
+  PromptTemplate,
+  ChatPromptTemplate,
+  MessagesPlaceholder,
+} from '@langchain/core/prompts';
+import {
+  RunnableSequence,
+  RunnableMap,
+  RunnableLambda,
+} from '@langchain/core/runnables';
+import { StringOutputParser } from '@langchain/core/output_parsers';
+import { Document } from '@langchain/core/documents';
+import { searchSearxng } from '../lib/searxng';
+import type { StreamEvent } from '@langchain/core/tracers/log_stream';
+import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import type { Embeddings } from '@langchain/core/embeddings';
+import formatChatHistoryAsString from '../utils/formatHistory';
+import eventEmitter from 'events';
+import computeSimilarity from '../utils/computeSimilarity';
+import logger from '../utils/logger';
+const basicSearchRetrieverPrompt = `
+You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
+If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
+Example:
+1. Follow up question: What is the capital of France?
+Rephrased: Capital of france
+2. Follow up question: What is the population of New York City?
+Rephrased: Population of New York City
+3. Follow up question: What is Docker?
+Rephrased: What is Docker
+Conversation:
+{chat_history}
+Follow up question: {query}
+Rephrased question:
+`;
+const basicWebSearchResponsePrompt = `
+    You are Perplexica, an AI model who is expert at searching the web and answering user's queries.
+    Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containg a brief description of the content of that page).
+    You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text.
+    You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them.
+    Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative.
+    You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from.
+    Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2].
+    However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer.
+    Aything inside the following \`context\` HTML block provided below is for your knowledge returned by the search engine and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to
+    talk about the context in your response.
+    <context>
+    {context}
+    </context>
+    If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'.
+    Anything between the \`context\` is retrieved from a search engine and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()}
+`;
+const strParser = new StringOutputParser();
+const handleStream = async (
+  stream: AsyncGenerator<StreamEvent, any, unknown>,
+  emitter: eventEmitter,
+) => {
+  for await (const event of stream) {
+    if (
+      event.event === 'on_chain_end' &&
+      event.name === 'FinalSourceRetriever'
+    ) {
+      emitter.emit(
+        'data',
+        JSON.stringify({ type: 'sources', data: event.data.output }),
+      );
+    }
+    if (
+      event.event === 'on_chain_stream' &&
+      event.name === 'FinalResponseGenerator'
+    ) {
+      emitter.emit(
+        'data',
+        JSON.stringify({ type: 'response', data: event.data.chunk }),
+      );
+    }
+    if (
+      event.event === 'on_chain_end' &&
+      event.name === 'FinalResponseGenerator'
+    ) {
+      emitter.emit('end');
+    }
+  }
+};
+type BasicChainInput = {
+  chat_history: BaseMessage[];
+  query: string;
+};
+const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => {
+  return RunnableSequence.from([
+    PromptTemplate.fromTemplate(basicSearchRetrieverPrompt),
+    llm,
+    strParser,
+    RunnableLambda.from(async (input: string) => {
+      if (input === 'not_needed') {
+        return { query: '', docs: [] };
+      }
+      const res = await searchSearxng(input, {
+        language: 'en',
+      });
+      const documents = res.results.map(
+        (result) =>
+          new Document({
+            pageContent: result.content,
+            metadata: {
+              title: result.title,
+              url: result.url,
+              ...(result.img_src && { img_src: result.img_src }),
+            },
+          }),
+      );
+      return { query: input, docs: documents };
+    }),
+  ]);
+};
+const createBasicWebSearchAnsweringChain = (
+  llm: BaseChatModel,
+  embeddings: Embeddings,
+) => {
+  const basicWebSearchRetrieverChain = createBasicWebSearchRetrieverChain(llm);
+  const processDocs = async (docs: Document[]) => {
+    return docs
+      .map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
+      .join('\n');
+  };
+  const rerankDocs = async ({
+    query,
+    docs,
+  }: {
+    query: string;
+    docs: Document[];
+  }) => {
+    if (docs.length === 0) {
+      return docs;
+    }
+    const docsWithContent = docs.filter(
+      (doc) => doc.pageContent && doc.pageContent.length > 0,
+    );
+    const [docEmbeddings, queryEmbedding] = await Promise.all([
+      embeddings.embedDocuments(docsWithContent.map((doc) => doc.pageContent)),
+      embeddings.embedQuery(query),
+    ]);
+    const similarity = docEmbeddings.map((docEmbedding, i) => {
+      const sim = computeSimilarity(queryEmbedding, docEmbedding);
+      return {
+        index: i,
+        similarity: sim,
+      };
+    });
+    const sortedDocs = similarity
+      .sort((a, b) => b.similarity - a.similarity)
+      .filter((sim) => sim.similarity > 0.5)
+      .slice(0, 15)
+      .map((sim) => docsWithContent[sim.index]);
+    return sortedDocs;
+  };
+  return RunnableSequence.from([
+    RunnableMap.from({
+      query: (input: BasicChainInput) => input.query,
+      chat_history: (input: BasicChainInput) => input.chat_history,
+      context: RunnableSequence.from([
+        (input) => ({
+          query: input.query,
+          chat_history: formatChatHistoryAsString(input.chat_history),
+        }),
+        basicWebSearchRetrieverChain
+          .pipe(rerankDocs)
+          .withConfig({
+            runName: 'FinalSourceRetriever',
+          })
+          .pipe(processDocs),
+      ]),
+    }),
+    ChatPromptTemplate.fromMessages([
+      ['system', basicWebSearchResponsePrompt],
+      new MessagesPlaceholder('chat_history'),
+      ['user', '{query}'],
+    ]),
+    llm,
+    strParser,
+  ]).withConfig({
+    runName: 'FinalResponseGenerator',
+  });
+};
+const basicWebSearch = (
+  query: string,
+  history: BaseMessage[],
+  llm: BaseChatModel,
+  embeddings: Embeddings,
+) => {
+  const emitter = new eventEmitter();
+  try {
+    const basicWebSearchAnsweringChain = createBasicWebSearchAnsweringChain(
+      llm,
+      embeddings,
+    );
+    const stream = basicWebSearchAnsweringChain.streamEvents(
+      {
+        chat_history: history,
+        query: query,
+      },
+      {
+        version: 'v1',
+      },
+    );
+    handleStream(stream, emitter);
+  } catch (err) {
+    emitter.emit(
+      'error',
+      JSON.stringify({ data: 'An error has occurred please try again later' }),
+    );
+    logger.error(`Error in websearch: ${err}`);
+  }
+  return emitter;
+};
+const handleWebSearch = (
+  message: string,
+  history: BaseMessage[],
+  llm: BaseChatModel,
+  embeddings: Embeddings,
+) => {
+  const emitter = basicWebSearch(message, history, llm, embeddings);
+  return emitter;
+};
+export default handleWebSearch;

src/agents/wolframAlphaSearchAgent.ts ADDED Viewed

	@@ -0,0 +1,219 @@

+import { BaseMessage } from '@langchain/core/messages';
+import {
+  PromptTemplate,
+  ChatPromptTemplate,
+  MessagesPlaceholder,
+} from '@langchain/core/prompts';
+import {
+  RunnableSequence,
+  RunnableMap,
+  RunnableLambda,
+} from '@langchain/core/runnables';
+import { StringOutputParser } from '@langchain/core/output_parsers';
+import { Document } from '@langchain/core/documents';
+import { searchSearxng } from '../lib/searxng';
+import type { StreamEvent } from '@langchain/core/tracers/log_stream';
+import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import type { Embeddings } from '@langchain/core/embeddings';
+import formatChatHistoryAsString from '../utils/formatHistory';
+import eventEmitter from 'events';
+import logger from '../utils/logger';
+const basicWolframAlphaSearchRetrieverPrompt = `
+You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
+If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
+Example:
+1. Follow up question: What is the atomic radius of S?
+Rephrased: Atomic radius of S
+2. Follow up question: What is linear algebra?
+Rephrased: Linear algebra
+3. Follow up question: What is the third law of thermodynamics?
+Rephrased: Third law of thermodynamics
+Conversation:
+{chat_history}
+Follow up question: {query}
+Rephrased question:
+`;
+const basicWolframAlphaSearchResponsePrompt = `
+    You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are set on focus mode 'Wolfram Alpha', this means you will be searching for information on the web using Wolfram Alpha. It is a computational knowledge engine that can answer factual queries and perform computations.
+    Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containg a brief description of the content of that page).
+    You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text.
+    You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them.
+    Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative.
+    You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from.
+    Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2].
+    However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer.
+    Aything inside the following \`context\` HTML block provided below is for your knowledge returned by Wolfram Alpha and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to
+    talk about the context in your response.
+    <context>
+    {context}
+    </context>
+    If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'.
+    Anything between the \`context\` is retrieved from Wolfram Alpha and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()}
+`;
+const strParser = new StringOutputParser();
+const handleStream = async (
+  stream: AsyncGenerator<StreamEvent, any, unknown>,
+  emitter: eventEmitter,
+) => {
+  for await (const event of stream) {
+    if (
+      event.event === 'on_chain_end' &&
+      event.name === 'FinalSourceRetriever'
+    ) {
+      emitter.emit(
+        'data',
+        JSON.stringify({ type: 'sources', data: event.data.output }),
+      );
+    }
+    if (
+      event.event === 'on_chain_stream' &&
+      event.name === 'FinalResponseGenerator'
+    ) {
+      emitter.emit(
+        'data',
+        JSON.stringify({ type: 'response', data: event.data.chunk }),
+      );
+    }
+    if (
+      event.event === 'on_chain_end' &&
+      event.name === 'FinalResponseGenerator'
+    ) {
+      emitter.emit('end');
+    }
+  }
+};
+type BasicChainInput = {
+  chat_history: BaseMessage[];
+  query: string;
+};
+const createBasicWolframAlphaSearchRetrieverChain = (llm: BaseChatModel) => {
+  return RunnableSequence.from([
+    PromptTemplate.fromTemplate(basicWolframAlphaSearchRetrieverPrompt),
+    llm,
+    strParser,
+    RunnableLambda.from(async (input: string) => {
+      if (input === 'not_needed') {
+        return { query: '', docs: [] };
+      }
+      const res = await searchSearxng(input, {
+        language: 'en',
+        engines: ['wolframalpha'],
+      });
+      const documents = res.results.map(
+        (result) =>
+          new Document({
+            pageContent: result.content,
+            metadata: {
+              title: result.title,
+              url: result.url,
+              ...(result.img_src && { img_src: result.img_src }),
+            },
+          }),
+      );
+      return { query: input, docs: documents };
+    }),
+  ]);
+};
+const createBasicWolframAlphaSearchAnsweringChain = (llm: BaseChatModel) => {
+  const basicWolframAlphaSearchRetrieverChain =
+    createBasicWolframAlphaSearchRetrieverChain(llm);
+  const processDocs = (docs: Document[]) => {
+    return docs
+      .map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
+      .join('\n');
+  };
+  return RunnableSequence.from([
+    RunnableMap.from({
+      query: (input: BasicChainInput) => input.query,
+      chat_history: (input: BasicChainInput) => input.chat_history,
+      context: RunnableSequence.from([
+        (input) => ({
+          query: input.query,
+          chat_history: formatChatHistoryAsString(input.chat_history),
+        }),
+        basicWolframAlphaSearchRetrieverChain
+          .pipe(({ query, docs }) => {
+            return docs;
+          })
+          .withConfig({
+            runName: 'FinalSourceRetriever',
+          })
+          .pipe(processDocs),
+      ]),
+    }),
+    ChatPromptTemplate.fromMessages([
+      ['system', basicWolframAlphaSearchResponsePrompt],
+      new MessagesPlaceholder('chat_history'),
+      ['user', '{query}'],
+    ]),
+    llm,
+    strParser,
+  ]).withConfig({
+    runName: 'FinalResponseGenerator',
+  });
+};
+const basicWolframAlphaSearch = (
+  query: string,
+  history: BaseMessage[],
+  llm: BaseChatModel,
+) => {
+  const emitter = new eventEmitter();
+  try {
+    const basicWolframAlphaSearchAnsweringChain =
+      createBasicWolframAlphaSearchAnsweringChain(llm);
+    const stream = basicWolframAlphaSearchAnsweringChain.streamEvents(
+      {
+        chat_history: history,
+        query: query,
+      },
+      {
+        version: 'v1',
+      },
+    );
+    handleStream(stream, emitter);
+  } catch (err) {
+    emitter.emit(
+      'error',
+      JSON.stringify({ data: 'An error has occurred please try again later' }),
+    );
+    logger.error(`Error in WolframAlphaSearch: ${err}`);
+  }
+  return emitter;
+};
+const handleWolframAlphaSearch = (
+  message: string,
+  history: BaseMessage[],
+  llm: BaseChatModel,
+  embeddings: Embeddings,
+) => {
+  const emitter = basicWolframAlphaSearch(message, history, llm);
+  return emitter;
+};
+export default handleWolframAlphaSearch;

src/agents/writingAssistant.ts ADDED Viewed

	@@ -0,0 +1,90 @@

+import { BaseMessage } from '@langchain/core/messages';
+import {
+  ChatPromptTemplate,
+  MessagesPlaceholder,
+} from '@langchain/core/prompts';
+import { RunnableSequence } from '@langchain/core/runnables';
+import { StringOutputParser } from '@langchain/core/output_parsers';
+import type { StreamEvent } from '@langchain/core/tracers/log_stream';
+import eventEmitter from 'events';
+import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import type { Embeddings } from '@langchain/core/embeddings';
+import logger from '../utils/logger';
+const writingAssistantPrompt = `
+You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are currently set on focus mode 'Writing Assistant', this means you will be helping the user write a response to a given query.
+Since you are a writing assistant, you would not perform web searches. If you think you lack information to answer the query, you can ask the user for more information or suggest them to switch to a different focus mode.
+`;
+const strParser = new StringOutputParser();
+const handleStream = async (
+  stream: AsyncGenerator<StreamEvent, any, unknown>,
+  emitter: eventEmitter,
+) => {
+  for await (const event of stream) {
+    if (
+      event.event === 'on_chain_stream' &&
+      event.name === 'FinalResponseGenerator'
+    ) {
+      emitter.emit(
+        'data',
+        JSON.stringify({ type: 'response', data: event.data.chunk }),
+      );
+    }
+    if (
+      event.event === 'on_chain_end' &&
+      event.name === 'FinalResponseGenerator'
+    ) {
+      emitter.emit('end');
+    }
+  }
+};
+const createWritingAssistantChain = (llm: BaseChatModel) => {
+  return RunnableSequence.from([
+    ChatPromptTemplate.fromMessages([
+      ['system', writingAssistantPrompt],
+      new MessagesPlaceholder('chat_history'),
+      ['user', '{query}'],
+    ]),
+    llm,
+    strParser,
+  ]).withConfig({
+    runName: 'FinalResponseGenerator',
+  });
+};
+const handleWritingAssistant = (
+  query: string,
+  history: BaseMessage[],
+  llm: BaseChatModel,
+  embeddings: Embeddings,
+) => {
+  const emitter = new eventEmitter();
+  try {
+    const writingAssistantChain = createWritingAssistantChain(llm);
+    const stream = writingAssistantChain.streamEvents(
+      {
+        chat_history: history,
+        query: query,
+      },
+      {
+        version: 'v1',
+      },
+    );
+    handleStream(stream, emitter);
+  } catch (err) {
+    emitter.emit(
+      'error',
+      JSON.stringify({ data: 'An error has occurred please try again later' }),
+    );
+    logger.error(`Error in writing assistant: ${err}`);
+  }
+  return emitter;
+};
+export default handleWritingAssistant;

src/agents/youtubeSearchAgent.ts ADDED Viewed

	@@ -0,0 +1,261 @@

+import { BaseMessage } from '@langchain/core/messages';
+import {
+  PromptTemplate,
+  ChatPromptTemplate,
+  MessagesPlaceholder,
+} from '@langchain/core/prompts';
+import {
+  RunnableSequence,
+  RunnableMap,
+  RunnableLambda,
+} from '@langchain/core/runnables';
+import { StringOutputParser } from '@langchain/core/output_parsers';
+import { Document } from '@langchain/core/documents';
+import { searchSearxng } from '../lib/searxng';
+import type { StreamEvent } from '@langchain/core/tracers/log_stream';
+import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import type { Embeddings } from '@langchain/core/embeddings';
+import formatChatHistoryAsString from '../utils/formatHistory';
+import eventEmitter from 'events';
+import computeSimilarity from '../utils/computeSimilarity';
+import logger from '../utils/logger';
+const basicYoutubeSearchRetrieverPrompt = `
+You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
+If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
+Example:
+1. Follow up question: How does an A.C work?
+Rephrased: A.C working
+2. Follow up question: Linear algebra explanation video
+Rephrased: What is linear algebra?
+3. Follow up question: What is theory of relativity?
+Rephrased: What is theory of relativity?
+Conversation:
+{chat_history}
+Follow up question: {query}
+Rephrased question:
+`;
+const basicYoutubeSearchResponsePrompt = `
+    You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are set on focus mode 'Youtube', this means you will be searching for videos on the web using Youtube and providing information based on the video's transcript.
+    Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containg a brief description of the content of that page).
+    You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text.
+    You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them.
+    Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative.
+    You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from.
+    Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2].
+    However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer.
+    Aything inside the following \`context\` HTML block provided below is for your knowledge returned by Youtube and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to
+    talk about the context in your response.
+    <context>
+    {context}
+    </context>
+    If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'.
+    Anything between the \`context\` is retrieved from Youtube and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()}
+`;
+const strParser = new StringOutputParser();
+const handleStream = async (
+  stream: AsyncGenerator<StreamEvent, any, unknown>,
+  emitter: eventEmitter,
+) => {
+  for await (const event of stream) {
+    if (
+      event.event === 'on_chain_end' &&
+      event.name === 'FinalSourceRetriever'
+    ) {
+      emitter.emit(
+        'data',
+        JSON.stringify({ type: 'sources', data: event.data.output }),
+      );
+    }
+    if (
+      event.event === 'on_chain_stream' &&
+      event.name === 'FinalResponseGenerator'
+    ) {
+      emitter.emit(
+        'data',
+        JSON.stringify({ type: 'response', data: event.data.chunk }),
+      );
+    }
+    if (
+      event.event === 'on_chain_end' &&
+      event.name === 'FinalResponseGenerator'
+    ) {
+      emitter.emit('end');
+    }
+  }
+};
+type BasicChainInput = {
+  chat_history: BaseMessage[];
+  query: string;
+};
+const createBasicYoutubeSearchRetrieverChain = (llm: BaseChatModel) => {
+  return RunnableSequence.from([
+    PromptTemplate.fromTemplate(basicYoutubeSearchRetrieverPrompt),
+    llm,
+    strParser,
+    RunnableLambda.from(async (input: string) => {
+      if (input === 'not_needed') {
+        return { query: '', docs: [] };
+      }
+      const res = await searchSearxng(input, {
+        language: 'en',
+        engines: ['youtube'],
+      });
+      const documents = res.results.map(
+        (result) =>
+          new Document({
+            pageContent: result.content ? result.content : result.title,
+            metadata: {
+              title: result.title,
+              url: result.url,
+              ...(result.img_src && { img_src: result.img_src }),
+            },
+          }),
+      );
+      return { query: input, docs: documents };
+    }),
+  ]);
+};
+const createBasicYoutubeSearchAnsweringChain = (
+  llm: BaseChatModel,
+  embeddings: Embeddings,
+) => {
+  const basicYoutubeSearchRetrieverChain =
+    createBasicYoutubeSearchRetrieverChain(llm);
+  const processDocs = async (docs: Document[]) => {
+    return docs
+      .map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
+      .join('\n');
+  };
+  const rerankDocs = async ({
+    query,
+    docs,
+  }: {
+    query: string;
+    docs: Document[];
+  }) => {
+    if (docs.length === 0) {
+      return docs;
+    }
+    const docsWithContent = docs.filter(
+      (doc) => doc.pageContent && doc.pageContent.length > 0,
+    );
+    const [docEmbeddings, queryEmbedding] = await Promise.all([
+      embeddings.embedDocuments(docsWithContent.map((doc) => doc.pageContent)),
+      embeddings.embedQuery(query),
+    ]);
+    const similarity = docEmbeddings.map((docEmbedding, i) => {
+      const sim = computeSimilarity(queryEmbedding, docEmbedding);
+      return {
+        index: i,
+        similarity: sim,
+      };
+    });
+    const sortedDocs = similarity
+      .sort((a, b) => b.similarity - a.similarity)
+      .slice(0, 15)
+      .filter((sim) => sim.similarity > 0.3)
+      .map((sim) => docsWithContent[sim.index]);
+    return sortedDocs;
+  };
+  return RunnableSequence.from([
+    RunnableMap.from({
+      query: (input: BasicChainInput) => input.query,
+      chat_history: (input: BasicChainInput) => input.chat_history,
+      context: RunnableSequence.from([
+        (input) => ({
+          query: input.query,
+          chat_history: formatChatHistoryAsString(input.chat_history),
+        }),
+        basicYoutubeSearchRetrieverChain
+          .pipe(rerankDocs)
+          .withConfig({
+            runName: 'FinalSourceRetriever',
+          })
+          .pipe(processDocs),
+      ]),
+    }),
+    ChatPromptTemplate.fromMessages([
+      ['system', basicYoutubeSearchResponsePrompt],
+      new MessagesPlaceholder('chat_history'),
+      ['user', '{query}'],
+    ]),
+    llm,
+    strParser,
+  ]).withConfig({
+    runName: 'FinalResponseGenerator',
+  });
+};
+const basicYoutubeSearch = (
+  query: string,
+  history: BaseMessage[],
+  llm: BaseChatModel,
+  embeddings: Embeddings,
+) => {
+  const emitter = new eventEmitter();
+  try {
+    const basicYoutubeSearchAnsweringChain =
+      createBasicYoutubeSearchAnsweringChain(llm, embeddings);
+    const stream = basicYoutubeSearchAnsweringChain.streamEvents(
+      {
+        chat_history: history,
+        query: query,
+      },
+      {
+        version: 'v1',
+      },
+    );
+    handleStream(stream, emitter);
+  } catch (err) {
+    emitter.emit(
+      'error',
+      JSON.stringify({ data: 'An error has occurred please try again later' }),
+    );
+    logger.error(`Error in youtube search: ${err}`);
+  }
+  return emitter;
+};
+const handleYoutubeSearch = (
+  message: string,
+  history: BaseMessage[],
+  llm: BaseChatModel,
+  embeddings: Embeddings,
+) => {
+  const emitter = basicYoutubeSearch(message, history, llm, embeddings);
+  return emitter;
+};
+export default handleYoutubeSearch;

src/app.ts ADDED Viewed

	@@ -0,0 +1,30 @@

+import { startWebSocketServer } from './websocket';
+import express from 'express';
+import cors from 'cors';
+import http from 'http';
+import routes from './routes';
+import { getPort } from './config';
+import logger from './utils/logger';
+const port = getPort();
+const app = express();
+const server = http.createServer(app);
+const corsOptions = {
+  origin: '*',
+};
+app.use(cors(corsOptions));
+app.use(express.json());
+app.use('/api', routes);
+app.get('/api', (_, res) => {
+  res.status(200).json({ status: 'ok' });
+});
+server.listen(port, () => {
+  logger.info(`Server is running on port ${port}`);
+});
+startWebSocketServer(server);

src/config.ts ADDED Viewed

	@@ -0,0 +1,69 @@

+import fs from 'fs';
+import path from 'path';
+import toml from '@iarna/toml';
+const configFileName = 'config.toml';
+interface Config {
+  GENERAL: {
+    PORT: number;
+    SIMILARITY_MEASURE: string;
+  };
+  API_KEYS: {
+    OPENAI: string;
+    GROQ: string;
+  };
+  API_ENDPOINTS: {
+    SEARXNG: string;
+    OLLAMA: string;
+  };
+}
+type RecursivePartial<T> = {
+  [P in keyof T]?: RecursivePartial<T[P]>;
+};
+const loadConfig = () =>
+  toml.parse(
+    fs.readFileSync(path.join(__dirname, `../${configFileName}`), 'utf-8'),
+  ) as any as Config;
+export const getPort = () => loadConfig().GENERAL.PORT;
+export const getSimilarityMeasure = () =>
+  loadConfig().GENERAL.SIMILARITY_MEASURE;
+export const getOpenaiApiKey = () => loadConfig().API_KEYS.OPENAI;
+export const getGroqApiKey = () => loadConfig().API_KEYS.GROQ;
+export const getSearxngApiEndpoint = () => loadConfig().API_ENDPOINTS.SEARXNG;
+export const getOllamaApiEndpoint = () => loadConfig().API_ENDPOINTS.OLLAMA;
+export const updateConfig = (config: RecursivePartial<Config>) => {
+  const currentConfig = loadConfig();
+  for (const key in currentConfig) {
+    if (!config[key]) config[key] = {};
+    if (typeof currentConfig[key] === 'object' && currentConfig[key] !== null) {
+      for (const nestedKey in currentConfig[key]) {
+        if (
+          !config[key][nestedKey] &&
+          currentConfig[key][nestedKey] &&
+          config[key][nestedKey] !== ''
+        ) {
+          config[key][nestedKey] = currentConfig[key][nestedKey];
+        }
+      }
+    } else if (currentConfig[key] && config[key] !== '') {
+      config[key] = currentConfig[key];
+    }
+  }
+  fs.writeFileSync(
+    path.join(__dirname, `../${configFileName}`),
+    toml.stringify(config),
+  );
+};

src/lib/providers.ts ADDED Viewed

	@@ -0,0 +1,157 @@

+import { ChatOpenAI, OpenAIEmbeddings } from '@langchain/openai';
+import { ChatOllama } from '@langchain/community/chat_models/ollama';
+import { OllamaEmbeddings } from '@langchain/community/embeddings/ollama';
+import {
+  getGroqApiKey,
+  getOllamaApiEndpoint,
+  getOpenaiApiKey,
+} from '../config';
+import logger from '../utils/logger';
+export const getAvailableChatModelProviders = async () => {
+  const openAIApiKey = getOpenaiApiKey();
+  const groqApiKey = getGroqApiKey();
+  const ollamaEndpoint = getOllamaApiEndpoint();
+  const models = {};
+  if (openAIApiKey) {
+    try {
+      models['openai'] = {
+        'GPT-3.5 turbo': new ChatOpenAI({
+          openAIApiKey,
+          modelName: 'gpt-3.5-turbo',
+          temperature: 0.7,
+        }),
+        'GPT-4': new ChatOpenAI({
+          openAIApiKey,
+          modelName: 'gpt-4',
+          temperature: 0.7,
+        }),
+        'GPT-4 turbo': new ChatOpenAI({
+          openAIApiKey,
+          modelName: 'gpt-4-turbo',
+          temperature: 0.7,
+        }),
+      };
+    } catch (err) {
+      logger.error(`Error loading OpenAI models: ${err}`);
+    }
+  }
+  if (groqApiKey) {
+    try {
+      models['groq'] = {
+        'LLaMA3 8b': new ChatOpenAI(
+          {
+            openAIApiKey: groqApiKey,
+            modelName: 'llama3-8b-8192',
+            temperature: 0.7,
+          },
+          {
+            baseURL: 'https://api.groq.com/openai/v1',
+          },
+        ),
+        'LLaMA3 70b': new ChatOpenAI(
+          {
+            openAIApiKey: groqApiKey,
+            modelName: 'llama3-70b-8192',
+            temperature: 0.7,
+          },
+          {
+            baseURL: 'https://api.groq.com/openai/v1',
+          },
+        ),
+        'Mixtral 8x7b': new ChatOpenAI(
+          {
+            openAIApiKey: groqApiKey,
+            modelName: 'mixtral-8x7b-32768',
+            temperature: 0.7,
+          },
+          {
+            baseURL: 'https://api.groq.com/openai/v1',
+          },
+        ),
+        'Gemma 7b': new ChatOpenAI(
+          {
+            openAIApiKey: groqApiKey,
+            modelName: 'gemma-7b-it',
+            temperature: 0.7,
+          },
+          {
+            baseURL: 'https://api.groq.com/openai/v1',
+          },
+        ),
+      };
+    } catch (err) {
+      logger.error(`Error loading Groq models: ${err}`);
+    }
+  }
+  if (ollamaEndpoint) {
+    try {
+      const response = await fetch(`${ollamaEndpoint}/api/tags`);
+      const { models: ollamaModels } = (await response.json()) as any;
+      models['ollama'] = ollamaModels.reduce((acc, model) => {
+        acc[model.model] = new ChatOllama({
+          baseUrl: ollamaEndpoint,
+          model: model.model,
+          temperature: 0.7,
+        });
+        return acc;
+      }, {});
+    } catch (err) {
+      logger.error(`Error loading Ollama models: ${err}`);
+    }
+  }
+  models['custom_openai'] = {};
+  return models;
+};
+export const getAvailableEmbeddingModelProviders = async () => {
+  const openAIApiKey = getOpenaiApiKey();
+  const ollamaEndpoint = getOllamaApiEndpoint();
+  const models = {};
+  if (openAIApiKey) {
+    try {
+      models['openai'] = {
+        'Text embedding 3 small': new OpenAIEmbeddings({
+          openAIApiKey,
+          modelName: 'text-embedding-3-small',
+        }),
+        'Text embedding 3 large': new OpenAIEmbeddings({
+          openAIApiKey,
+          modelName: 'text-embedding-3-large',
+        }),
+      };
+    } catch (err) {
+      logger.error(`Error loading OpenAI embeddings: ${err}`);
+    }
+  }
+  if (ollamaEndpoint) {
+    try {
+      const response = await fetch(`${ollamaEndpoint}/api/tags`);
+      const { models: ollamaModels } = (await response.json()) as any;
+      models['ollama'] = ollamaModels.reduce((acc, model) => {
+        acc[model.model] = new OllamaEmbeddings({
+          baseUrl: ollamaEndpoint,
+          model: model.model,
+        });
+        return acc;
+      }, {});
+    } catch (err) {
+      logger.error(`Error loading Ollama embeddings: ${err}`);
+    }
+  }
+  return models;
+};

src/lib/searxng.ts ADDED Viewed

	@@ -0,0 +1,47 @@

+import axios from 'axios';
+import { getSearxngApiEndpoint } from '../config';
+interface SearxngSearchOptions {
+  categories?: string[];
+  engines?: string[];
+  language?: string;
+  pageno?: number;
+}
+interface SearxngSearchResult {
+  title: string;
+  url: string;
+  img_src?: string;
+  thumbnail_src?: string;
+  thumbnail?: string;
+  content?: string;
+  author?: string;
+  iframe_src?: string;
+}
+export const searchSearxng = async (
+  query: string,
+  opts?: SearxngSearchOptions,
+) => {
+  const searxngURL = getSearxngApiEndpoint();
+  const url = new URL(`${searxngURL}/search?format=json`);
+  url.searchParams.append('q', query);
+  if (opts) {
+    Object.keys(opts).forEach((key) => {
+      if (Array.isArray(opts[key])) {
+        url.searchParams.append(key, opts[key].join(','));
+        return;
+      }
+      url.searchParams.append(key, opts[key]);
+    });
+  }
+  const res = await axios.get(url.toString());
+  const results: SearxngSearchResult[] = res.data.results;
+  const suggestions: string[] = res.data.suggestions;
+  return { results, suggestions };
+};

src/routes/config.ts ADDED Viewed

	@@ -0,0 +1,63 @@

+import express from 'express';
+import {
+  getAvailableChatModelProviders,
+  getAvailableEmbeddingModelProviders,
+} from '../lib/providers';
+import {
+  getGroqApiKey,
+  getOllamaApiEndpoint,
+  getOpenaiApiKey,
+  updateConfig,
+} from '../config';
+const router = express.Router();
+router.get('/', async (_, res) => {
+  const config = {};
+  const [chatModelProviders, embeddingModelProviders] = await Promise.all([
+    getAvailableChatModelProviders(),
+    getAvailableEmbeddingModelProviders(),
+  ]);
+  config['chatModelProviders'] = {};
+  config['embeddingModelProviders'] = {};
+  for (const provider in chatModelProviders) {
+    config['chatModelProviders'][provider] = Object.keys(
+      chatModelProviders[provider],
+    );
+  }
+  for (const provider in embeddingModelProviders) {
+    config['embeddingModelProviders'][provider] = Object.keys(
+      embeddingModelProviders[provider],
+    );
+  }
+  config['openaiApiKey'] = getOpenaiApiKey();
+  config['ollamaApiUrl'] = getOllamaApiEndpoint();
+  config['groqApiKey'] = getGroqApiKey();
+  res.status(200).json(config);
+});
+router.post('/', async (req, res) => {
+  const config = req.body;
+  const updatedConfig = {
+    API_KEYS: {
+      OPENAI: config.openaiApiKey,
+      GROQ: config.groqApiKey,
+    },
+    API_ENDPOINTS: {
+      OLLAMA: config.ollamaApiUrl,
+    },
+  };
+  updateConfig(updatedConfig);
+  res.status(200).json({ message: 'Config updated' });
+});
+export default router;

src/routes/images.ts ADDED Viewed

	@@ -0,0 +1,46 @@

+import express from 'express';
+import handleImageSearch from '../agents/imageSearchAgent';
+import { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import { getAvailableChatModelProviders } from '../lib/providers';
+import { HumanMessage, AIMessage } from '@langchain/core/messages';
+import logger from '../utils/logger';
+const router = express.Router();
+router.post('/', async (req, res) => {
+  try {
+    let { query, chat_history, chat_model_provider, chat_model } = req.body;
+    chat_history = chat_history.map((msg: any) => {
+      if (msg.role === 'user') {
+        return new HumanMessage(msg.content);
+      } else if (msg.role === 'assistant') {
+        return new AIMessage(msg.content);
+      }
+    });
+    const chatModels = await getAvailableChatModelProviders();
+    const provider = chat_model_provider || Object.keys(chatModels)[0];
+    const chatModel = chat_model || Object.keys(chatModels[provider])[0];
+    let llm: BaseChatModel | undefined;
+    if (chatModels[provider] && chatModels[provider][chatModel]) {
+      llm = chatModels[provider][chatModel] as BaseChatModel | undefined;
+    }
+    if (!llm) {
+      res.status(500).json({ message: 'Invalid LLM model selected' });
+      return;
+    }
+    const images = await handleImageSearch({ query, chat_history }, llm);
+    res.status(200).json({ images });
+  } catch (err) {
+    res.status(500).json({ message: 'An error has occurred.' });
+    logger.error(`Error in image search: ${err.message}`);
+  }
+});
+export default router;

src/routes/index.ts ADDED Viewed

	@@ -0,0 +1,14 @@

+import express from 'express';
+import imagesRouter from './images';
+import videosRouter from './videos';
+import configRouter from './config';
+import modelsRouter from './models';
+const router = express.Router();
+router.use('/images', imagesRouter);
+router.use('/videos', videosRouter);
+router.use('/config', configRouter);
+router.use('/models', modelsRouter);
+export default router;

src/routes/models.ts ADDED Viewed

	@@ -0,0 +1,24 @@

+import express from 'express';
+import logger from '../utils/logger';
+import {
+  getAvailableChatModelProviders,
+  getAvailableEmbeddingModelProviders,
+} from '../lib/providers';
+const router = express.Router();
+router.get('/', async (req, res) => {
+  try {
+    const [chatModelProviders, embeddingModelProviders] = await Promise.all([
+      getAvailableChatModelProviders(),
+      getAvailableEmbeddingModelProviders(),
+    ]);
+    res.status(200).json({ chatModelProviders, embeddingModelProviders });
+  } catch (err) {
+    res.status(500).json({ message: 'An error has occurred.' });
+    logger.error(err.message);
+  }
+});
+export default router;

src/routes/videos.ts ADDED Viewed

	@@ -0,0 +1,46 @@

+import express from 'express';
+import { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import { getAvailableChatModelProviders } from '../lib/providers';
+import { HumanMessage, AIMessage } from '@langchain/core/messages';
+import logger from '../utils/logger';
+import handleVideoSearch from '../agents/videoSearchAgent';
+const router = express.Router();
+router.post('/', async (req, res) => {
+  try {
+    let { query, chat_history, chat_model_provider, chat_model } = req.body;
+    chat_history = chat_history.map((msg: any) => {
+      if (msg.role === 'user') {
+        return new HumanMessage(msg.content);
+      } else if (msg.role === 'assistant') {
+        return new AIMessage(msg.content);
+      }
+    });
+    const chatModels = await getAvailableChatModelProviders();
+    const provider = chat_model_provider || Object.keys(chatModels)[0];
+    const chatModel = chat_model || Object.keys(chatModels[provider])[0];
+    let llm: BaseChatModel | undefined;
+    if (chatModels[provider] && chatModels[provider][chatModel]) {
+      llm = chatModels[provider][chatModel] as BaseChatModel | undefined;
+    }
+    if (!llm) {
+      res.status(500).json({ message: 'Invalid LLM model selected' });
+      return;
+    }
+    const videos = await handleVideoSearch({ chat_history, query }, llm);
+    res.status(200).json({ videos });
+  } catch (err) {
+    res.status(500).json({ message: 'An error has occurred.' });
+    logger.error(`Error in video search: ${err.message}`);
+  }
+});
+export default router;

src/utils/computeSimilarity.ts ADDED Viewed

	@@ -0,0 +1,17 @@

+import dot from 'compute-dot';
+import cosineSimilarity from 'compute-cosine-similarity';
+import { getSimilarityMeasure } from '../config';
+const computeSimilarity = (x: number[], y: number[]): number => {
+  const similarityMeasure = getSimilarityMeasure();
+  if (similarityMeasure === 'cosine') {
+    return cosineSimilarity(x, y);
+  } else if (similarityMeasure === 'dot') {
+    return dot(x, y);
+  }
+  throw new Error('Invalid similarity measure');
+};
+export default computeSimilarity;

src/utils/formatHistory.ts ADDED Viewed

	@@ -0,0 +1,9 @@

+import { BaseMessage } from '@langchain/core/messages';
+const formatChatHistoryAsString = (history: BaseMessage[]) => {
+  return history
+    .map((message) => `${message._getType()}: ${message.content}`)
+    .join('\n');
+};
+export default formatChatHistoryAsString;

src/utils/logger.ts ADDED Viewed

	@@ -0,0 +1,22 @@

+import winston from 'winston';
+const logger = winston.createLogger({
+  level: 'info',
+  transports: [
+    new winston.transports.Console({
+      format: winston.format.combine(
+        winston.format.colorize(),
+        winston.format.simple(),
+      ),
+    }),
+    new winston.transports.File({
+      filename: 'app.log',
+      format: winston.format.combine(
+        winston.format.timestamp(),
+        winston.format.json(),
+      ),
+    }),
+  ],
+});
+export default logger;

src/websocket/connectionManager.ts ADDED Viewed

	@@ -0,0 +1,86 @@

+import { WebSocket } from 'ws';
+import { handleMessage } from './messageHandler';
+import {
+  getAvailableEmbeddingModelProviders,
+  getAvailableChatModelProviders,
+} from '../lib/providers';
+import { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import type { Embeddings } from '@langchain/core/embeddings';
+import type { IncomingMessage } from 'http';
+import logger from '../utils/logger';
+import { ChatOpenAI } from '@langchain/openai';
+export const handleConnection = async (
+  ws: WebSocket,
+  request: IncomingMessage,
+) => {
+  const searchParams = new URL(request.url, `http://${request.headers.host}`)
+    .searchParams;
+  const [chatModelProviders, embeddingModelProviders] = await Promise.all([
+    getAvailableChatModelProviders(),
+    getAvailableEmbeddingModelProviders(),
+  ]);
+  const chatModelProvider =
+    searchParams.get('chatModelProvider') || Object.keys(chatModelProviders)[0];
+  const chatModel =
+    searchParams.get('chatModel') ||
+    Object.keys(chatModelProviders[chatModelProvider])[0];
+  const embeddingModelProvider =
+    searchParams.get('embeddingModelProvider') ||
+    Object.keys(embeddingModelProviders)[0];
+  const embeddingModel =
+    searchParams.get('embeddingModel') ||
+    Object.keys(embeddingModelProviders[embeddingModelProvider])[0];
+  let llm: BaseChatModel | undefined;
+  let embeddings: Embeddings | undefined;
+  if (
+    chatModelProviders[chatModelProvider] &&
+    chatModelProviders[chatModelProvider][chatModel] &&
+    chatModelProvider != 'custom_openai'
+  ) {
+    llm = chatModelProviders[chatModelProvider][chatModel] as
+      | BaseChatModel
+      | undefined;
+  } else if (chatModelProvider == 'custom_openai') {
+    llm = new ChatOpenAI({
+      modelName: chatModel,
+      openAIApiKey: searchParams.get('openAIApiKey'),
+      temperature: 0.7,
+      configuration: {
+        baseURL: searchParams.get('openAIBaseURL'),
+      },
+    });
+  }
+  if (
+    embeddingModelProviders[embeddingModelProvider] &&
+    embeddingModelProviders[embeddingModelProvider][embeddingModel]
+  ) {
+    embeddings = embeddingModelProviders[embeddingModelProvider][
+      embeddingModel
+    ] as Embeddings | undefined;
+  }
+  if (!llm || !embeddings) {
+    ws.send(
+      JSON.stringify({
+        type: 'error',
+        data: 'Invalid LLM or embeddings model selected',
+      }),
+    );
+    ws.close();
+  }
+  ws.on(
+    'message',
+    async (message) =>
+      await handleMessage(message.toString(), ws, llm, embeddings),
+  );
+  ws.on('close', () => logger.debug('Connection closed'));
+};

src/websocket/index.ts ADDED Viewed

	@@ -0,0 +1,8 @@

+import { initServer } from './websocketServer';
+import http from 'http';
+export const startWebSocketServer = (
+  server: http.Server<typeof http.IncomingMessage, typeof http.ServerResponse>,
+) => {
+  initServer(server);
+};

src/websocket/messageHandler.ts ADDED Viewed

	@@ -0,0 +1,109 @@

+import { EventEmitter, WebSocket } from 'ws';
+import { BaseMessage, AIMessage, HumanMessage } from '@langchain/core/messages';
+import handleWebSearch from '../agents/webSearchAgent';
+import handleAcademicSearch from '../agents/academicSearchAgent';
+import handleWritingAssistant from '../agents/writingAssistant';
+import handleWolframAlphaSearch from '../agents/wolframAlphaSearchAgent';
+import handleYoutubeSearch from '../agents/youtubeSearchAgent';
+import handleRedditSearch from '../agents/redditSearchAgent';
+import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import type { Embeddings } from '@langchain/core/embeddings';
+import logger from '../utils/logger';
+type Message = {
+  type: string;
+  content: string;
+  copilot: boolean;
+  focusMode: string;
+  history: Array<[string, string]>;
+};
+const searchHandlers = {
+  webSearch: handleWebSearch,
+  academicSearch: handleAcademicSearch,
+  writingAssistant: handleWritingAssistant,
+  wolframAlphaSearch: handleWolframAlphaSearch,
+  youtubeSearch: handleYoutubeSearch,
+  redditSearch: handleRedditSearch,
+};
+const handleEmitterEvents = (
+  emitter: EventEmitter,
+  ws: WebSocket,
+  id: string,
+) => {
+  emitter.on('data', (data) => {
+    const parsedData = JSON.parse(data);
+    if (parsedData.type === 'response') {
+      ws.send(
+        JSON.stringify({
+          type: 'message',
+          data: parsedData.data,
+          messageId: id,
+        }),
+      );
+    } else if (parsedData.type === 'sources') {
+      ws.send(
+        JSON.stringify({
+          type: 'sources',
+          data: parsedData.data,
+          messageId: id,
+        }),
+      );
+    }
+  });
+  emitter.on('end', () => {
+    ws.send(JSON.stringify({ type: 'messageEnd', messageId: id }));
+  });
+  emitter.on('error', (data) => {
+    const parsedData = JSON.parse(data);
+    ws.send(JSON.stringify({ type: 'error', data: parsedData.data }));
+  });
+};
+export const handleMessage = async (
+  message: string,
+  ws: WebSocket,
+  llm: BaseChatModel,
+  embeddings: Embeddings,
+) => {
+  try {
+    const parsedMessage = JSON.parse(message) as Message;
+    const id = Math.random().toString(36).substring(7);
+    if (!parsedMessage.content)
+      return ws.send(
+        JSON.stringify({ type: 'error', data: 'Invalid message format' }),
+      );
+    const history: BaseMessage[] = parsedMessage.history.map((msg) => {
+      if (msg[0] === 'human') {
+        return new HumanMessage({
+          content: msg[1],
+        });
+      } else {
+        return new AIMessage({
+          content: msg[1],
+        });
+      }
+    });
+    if (parsedMessage.type === 'message') {
+      const handler = searchHandlers[parsedMessage.focusMode];
+      if (handler) {
+        const emitter = handler(
+          parsedMessage.content,
+          history,
+          llm,
+          embeddings,
+        );
+        handleEmitterEvents(emitter, ws, id);
+      } else {
+        ws.send(JSON.stringify({ type: 'error', data: 'Invalid focus mode' }));
+      }
+    }
+  } catch (err) {
+    ws.send(JSON.stringify({ type: 'error', data: 'Invalid message format' }));
+    logger.error(`Failed to handle message: ${err}`);
+  }
+};

src/websocket/websocketServer.ts ADDED Viewed

	@@ -0,0 +1,16 @@

+import { WebSocketServer } from 'ws';
+import { handleConnection } from './connectionManager';
+import http from 'http';
+import { getPort } from '../config';
+import logger from '../utils/logger';
+export const initServer = (
+  server: http.Server<typeof http.IncomingMessage, typeof http.ServerResponse>,
+) => {
+  const port = getPort();
+  const wss = new WebSocketServer({ server });
+  wss.on('connection', handleConnection);
+  logger.info(`WebSocket server started on port ${port}`);
+};

tsconfig.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "compilerOptions": {
+    "lib": ["ESNext"],
+    "module": "commonjs",
+    "target": "ESNext",
+    "outDir": "dist",
+    "sourceMap": false,
+    "esModuleInterop": true,
+    "experimentalDecorators": true,
+    "emitDecoratorMetadata": true,
+    "allowSyntheticDefaultImports": true,
+    "skipLibCheck": true,
+    "skipDefaultLibCheck": true
+  },
+  "include": ["src"],
+  "exclude": ["node_modules", "**/*.spec.ts"]
+}

ui/.env.example ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ NEXT_PUBLIC_WS_URL=ws://localhost:3001
2	+ NEXT_PUBLIC_API_URL=http://localhost:3001/api

ui/.eslintrc.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "extends": "next/core-web-vitals"
+}

ui/.gitignore ADDED Viewed

	@@ -0,0 +1,34 @@

+# dependencies
+/node_modules
+/.pnp
+.pnp.js
+.yarn/install-state.gz
+# testing
+/coverage
+# next.js
+/.next/
+/out/
+# production
+/build
+# misc
+.DS_Store
+*.pem
+# debug
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+# local env files
+.env*.local
+# vercel
+.vercel
+# typescript
+*.tsbuildinfo
+next-env.d.ts

ui/.prettierrc.js ADDED Viewed

	@@ -0,0 +1,11 @@

+/** @type {import("prettier").Config} */
+const config = {
+  printWidth: 80,
+  trailingComma: 'all',
+  endOfLine: 'auto',
+  singleQuote: true,
+  tabWidth: 2,
+};
+module.exports = config;

ui/app/discover/page.tsx ADDED Viewed

	@@ -0,0 +1,5 @@

+const Page = () => {
+  return <div>page</div>;
+};
+export default Page;