Spaces:
Runtime error
Runtime error
File size: 7,364 Bytes
71d15da 2d3a993 3094e1a 2d3a993 cd654e0 3094e1a 99b37ed 2d3a993 a70fff0 3094e1a 2d3a993 a70fff0 2d3a993 71d15da 2d3a993 d137d29 2d3a993 a70fff0 3094e1a 71d15da 3094e1a 71d15da a70fff0 2d3a993 a70fff0 2d3a993 a70fff0 2d3a993 a70fff0 2d3a993 a70fff0 2d3a993 a70fff0 2d3a993 a70fff0 2d3a993 a70fff0 2d3a993 a70fff0 7b3c60c a70fff0 2d3a993 a70fff0 3094e1a 71d15da 3094e1a 2d3a993 596b3cf 0b7d2ba 596b3cf 0b7d2ba 596b3cf 0b7d2ba 71d15da 2d3a993 71d15da 2d3a993 820f986 71d15da 0b7d2ba 71d15da |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
import time, os, multiprocessing, torch, requests, asyncio, json, aiohttp
from minivectordb.embedding_model import EmbeddingModel
from minivectordb.vector_database import VectorDatabase
from text_util_en_pt.cleaner import structurize_text, detect_language, Language
from webtextcrawler.webtextcrawler import extract_text_from_url
import gradio as gr
from googlesearch import search
torch.set_num_threads(2)
openrouter_key = os.environ.get("OPENROUTER_KEY")
model = EmbeddingModel(use_quantized_onnx_model=True)
def fetch_links(query, max_results=10):
return list(search(query, num_results=max_results))
def fetch_texts(links):
with multiprocessing.Pool(10) as pool:
texts = pool.map(extract_text_from_url, links)
return '\n'.join([t for t in texts if t])
def index_and_search(query, text):
start = time.time()
query_embedding = model.extract_embeddings(query)
# Indexing
vector_db = VectorDatabase()
sentences = [ s['sentence'] for s in structurize_text(text)]
for idx, sentence in enumerate(sentences):
sentence_embedding = model.extract_embeddings(sentence)
vector_db.store_embedding(idx + 1, sentence_embedding, {'sentence': sentence})
embedding_time = time.time() - start
# Retrieval
start = time.time()
search_results = vector_db.find_most_similar(query_embedding, k=30)
retrieval_time = time.time() - start
return '\n'.join([s['sentence'] for s in search_results[2]]), embedding_time, retrieval_time
def generate_search_terms(message, lang):
if lang == Language.ptbr:
prompt = f"A partir do texto a seguir, gere alguns termos de pesquisa: \"{message}\"\nSua resposta deve ser apenas o termo de busca mais adequado, e nada mais."
else:
prompt = f"From the following text, generate some search terms: \"{message}\"\nYour answer should be just the most appropriate search term, and nothing else."
url = "https://openrouter.ai/api/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {openrouter_key}"
}
body = {
"stream": False,
"models": [
"mistralai/mistral-7b-instruct:free",
"openchat/openchat-7b:free"
],
"route": "fallback",
"max_tokens": 1024,
"messages": [
{"role": "user", "content": prompt}
]
}
response = requests.post(url, headers=headers, json=body)
return response.json()['choices'][0]['message']['content']
async def predict(message, history):
full_response = ""
query_language = detect_language(message)
start = time.time()
full_response += "Generating search terms...\n"
yield full_response
search_query = generate_search_terms(message, query_language)
search_terms_time = time.time() - start
full_response += f"Search terms: \"{search_query}\"\n"
yield full_response
full_response += f"Search terms took: {search_terms_time:.4f} seconds\n"
yield full_response
start = time.time()
full_response += "\nSearching the web...\n"
yield full_response
links = fetch_links(search_query)
websearch_time = time.time() - start
full_response += f"Web search took: {websearch_time:.4f} seconds\n"
yield full_response
full_response += f"Links visited:\n"
yield full_response
for link in links:
full_response += f"{link}\n"
yield full_response
full_response += "\nExtracting text from web pages...\n"
yield full_response
start = time.time()
text = fetch_texts(links)
webcrawl_time = time.time() - start
full_response += f"Text extraction took: {webcrawl_time:.4f} seconds\n"
full_response += "\nIndexing in vector database and building prompt...\n"
yield full_response
context, embedding_time, retrieval_time = index_and_search(message, text)
if query_language == Language.ptbr:
prompt = f"Contexto:\n{context}\n\nResponda: \"{message}\"\n(Você pode utilizar o contexto para responder)\n(Sua resposta deve ser completa, detalhada e bem estruturada)"
else:
prompt = f"Context:\n{context}\n\nAnswer: \"{message}\"\n(You can use the context to answer)\n(Your answer should be complete, detailed and well-structured)"
full_response += f"Embedding time: {embedding_time:.4f} seconds\n"
full_response += f"Retrieval from VectorDB time: {retrieval_time:.4f} seconds\n"
yield full_response
full_response += "\nGenerating response...\n"
yield full_response
full_response += "\nResponse: "
url = "https://openrouter.ai/api/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {openrouter_key}"
}
body = {
"stream": True,
"models": [
"mistralai/mistral-7b-instruct:free",
"openchat/openchat-7b:free"
],
"route": "fallback",
"max_tokens": 1024,
"messages": [
{"role": "user", "content": prompt}
]
}
async with aiohttp.ClientSession() as session:
async with session.post(url, headers=headers, json=body) as response:
buffer = "" # A buffer to hold incomplete lines of data
async for chunk in response.content.iter_any():
buffer += chunk.decode()
while "\n" in buffer: # Process as long as there are complete lines in the buffer
line, buffer = buffer.split("\n", 1)
if line.startswith("data: "):
event_data = line[len("data: "):]
if event_data != '[DONE]':
try:
current_text = json.loads(event_data)['choices'][0]['delta']['content']
full_response += current_text
yield full_response
await asyncio.sleep(0.01)
except Exception:
try:
current_text = json.loads(event_data)['choices'][0]['text']
full_response += current_text
yield full_response
await asyncio.sleep(0.01)
except Exception:
pass
# Define a custom theme with colors similar to the provided image
custom_theme = gr.themes.Default(
primary_hue="#6a0dad", # Custom primary color (similar to the purple in the image)
secondary_hue="#4b0082", # Custom secondary color
font="Roboto", # Custom font
font_size="16px", # Custom font size
rounded_corners=True, # Enable rounded corners
background_color="#1a1a2e", # Custom dark background color
text_color="#f0f0f0" # Custom text color for better contrast
)
# Apply the custom theme to the Gradio interface
gr.ChatInterface(
predict,
title="Assistant pro",
retry_btn=None,
undo_btn=None,
examples=[
'What is the current sentiment of the Brazil election?',
'Compare the current economies of China and India?',
'What are new shoe design trends in 2024',
],
theme=custom_theme # Apply the custom theme
).launch() |