Spaces:
Runtime error
Runtime error
File size: 5,366 Bytes
2d3a993 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import time, aiohttp, asyncio, json, os, multiprocessing
from minivectordb.embedding_model import EmbeddingModel
from minivectordb.vector_database import VectorDatabase
from text_util_en_pt.cleaner import structurize_text, detect_language, Language
from webtextcrawler.webtextcrawler import extract_text_from_url
from duckduckgo_search import DDGS
import gradio as gr
openrouter_key = os.environ.get("OPENROUTER_KEY")
model = EmbeddingModel(use_quantized_onnx_model=False, e5_model_size='small')
def fetch_links(query, max_results=10):
with DDGS() as ddgs:
return [r['href'] for r in ddgs.text(query, max_results=max_results)]
def fetch_texts(links):
with multiprocessing.Pool() as pool:
texts = pool.map(extract_text_from_url, links)
return '\n'.join([t for t in texts if t])
def index_and_search(query, text):
start = time.time()
query_embedding = model.extract_embeddings(query)
# Indexing
vector_db = VectorDatabase()
sentences = [ s['sentence'] for s in structurize_text(text)]
for idx, sentence in enumerate(sentences):
sentence_embedding = model.extract_embeddings(sentence)
vector_db.store_embedding(idx + 1, sentence_embedding, {'sentence': sentence})
embedding_time = time.time() - start
# Retrieval
start = time.time()
search_results = vector_db.find_most_similar(query_embedding, k = 10)
retrieval_time = time.time() - start
return '\n'.join([s['sentence'] for s in search_results[2]]), embedding_time, retrieval_time
def retrieval_pipeline(query):
start = time.time()
links = fetch_links(query)
websearch_time = time.time() - start
start = time.time()
text = fetch_texts(links)
webcrawl_time = time.time() - start
context, embedding_time, retrieval_time = index_and_search(query, text)
return context, websearch_time, webcrawl_time, embedding_time, retrieval_time, links
async def predict(message, history):
context, websearch_time, webcrawl_time, embedding_time, retrieval_time, links = retrieval_pipeline(message)
if detect_language(message) == Language.ptbr:
prompt = f"Contexto:\n\n{context}\n\nBaseado no contexto, responda: {message}"
else:
prompt = f"Context:\n\n{context}\n\nBased on the context, answer: {message}"
url = "https://openrouter.ai/api/v1/chat/completions"
headers = { "Content-Type": "application/json",
"Authorization": f"Bearer {openrouter_key}" }
body = { "stream": True,
"models": [
"mistralai/mistral-7b-instruct:free",
"nousresearch/nous-capybara-7b:free"
"huggingfaceh4/zephyr-7b-beta:free",
"openchat/openchat-7b:free"
],
"route": "fallback",
"max_tokens": 768,
"messages": [
{"role": "user", "content": prompt}
] }
full_response = ""
async with aiohttp.ClientSession() as session:
async with session.post(url, headers=headers, json=body) as response:
buffer = "" # A buffer to hold incomplete lines of SSE data
async for chunk in response.content.iter_any():
buffer += chunk.decode()
while "\n" in buffer: # Process as long as there are complete lines in the buffer
line, buffer = buffer.split("\n", 1)
if line.startswith("data: "):
event_data = line[len("data: "):]
if event_data != '[DONE]':
try:
current_text = json.loads(event_data)['choices'][0]['delta']['content']
full_response += current_text
yield full_response
await asyncio.sleep(0.01)
except Exception:
try:
current_text = json.loads(event_data)['choices'][0]['text']
full_response += current_text
yield full_response
await asyncio.sleep(0.01)
except Exception as e:
print(e)
final_metadata_block = ""
final_metadata_block += f"Links visited:\n"
for link in links:
final_metadata_block += f"{link}\n"
final_metadata_block += f"\nWeb search time: {websearch_time:.4f} seconds\n"
final_metadata_block += f"\nText extraction: {webcrawl_time:.4f} seconds\n"
final_metadata_block += f"\nEmbedding time: {embedding_time:.4f} seconds\n"
final_metadata_block += f"\nRetrieval from VectorDB time: {retrieval_time:.4f} seconds"
yield f"{full_response}\n\n{final_metadata_block}"
# Setting up the Gradio chat interface.
gr.ChatInterface(
predict,
title="AI Web Search",
description="Ask any question, and I will try to answer it using web search !",
retry_btn=None,
undo_btn=None,
examples=[
'When did the first human land on the moon?',
'Liquid vs solid vs gas ?',
'What is the capital of France?',
'Why does Brazil has a high tax rate?'
]
).launch() # Launching the web interface. |