Spaces:
Runtime error
Runtime error
testing
Browse files- app.py +79 -33
- embeddings.npy +3 -0
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
from openai import OpenAI
|
3 |
-
from
|
4 |
from datasets import load_dataset, Dataset
|
5 |
from sklearn.neighbors import NearestNeighbors
|
6 |
import numpy as np
|
@@ -34,23 +34,22 @@ if model_provider.lower() == "openai":
|
|
34 |
MODEL_NAME = os.environ['OPENAI_MODEL']
|
35 |
client = OpenAI(
|
36 |
base_url=os.environ.get("OPENAI_BASE"),
|
37 |
-
api_key=
|
38 |
)
|
39 |
else:
|
40 |
-
MODEL_NAME = "
|
41 |
# Initialize Hugging Face InferenceClient with GPU support
|
42 |
hf_client = InferenceClient(
|
43 |
model=MODEL_NAME,
|
44 |
api_key=os.environ.get("HF_TOKEN"),
|
45 |
-
timeout=
|
46 |
)
|
47 |
|
48 |
# Load the Hugging Face dataset
|
49 |
dataset = load_dataset('tosin2013/autogen', streaming=True)
|
50 |
dataset = Dataset.from_list(list(dataset['train']))
|
51 |
|
52 |
-
#
|
53 |
-
# Use CPU for embeddings since GPU is handled by spaces.GPU decorator
|
54 |
embeddings = HuggingFaceEmbeddings(
|
55 |
model_name="sentence-transformers/all-MiniLM-L6-v2",
|
56 |
model_kwargs={"device": "cpu"}
|
@@ -59,41 +58,57 @@ embeddings = HuggingFaceEmbeddings(
|
|
59 |
# Extract texts from the dataset
|
60 |
texts = dataset['input']
|
61 |
|
62 |
-
# Create embeddings for the texts
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
def get_relevant_documents(query, k=5):
|
70 |
"""
|
71 |
Retrieves the k most relevant documents to the query.
|
72 |
"""
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
embeddings.model_kwargs["device"] = "cpu"
|
84 |
-
query_embedding = embeddings.embed_query(query)
|
85 |
-
distances, indices = nn.kneighbors([query_embedding], n_neighbors=k)
|
86 |
-
relevant_docs = [texts[i] for i in indices[0]]
|
87 |
-
return relevant_docs
|
88 |
|
89 |
def generate_response(question, history):
|
|
|
|
|
|
|
90 |
try:
|
91 |
-
|
92 |
-
with spaces.GPU(duration=60):
|
93 |
-
return _generate_response_gpu(question, history)
|
94 |
except Exception as e:
|
95 |
-
print(f"[WARNING] GPU failed
|
96 |
-
|
|
|
|
|
|
|
|
|
97 |
|
98 |
def _generate_response_gpu(question, history):
|
99 |
print(f"\n[LOG] Received question: {question}")
|
@@ -273,6 +288,7 @@ Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize fe
|
|
273 |
history.append((question, error_msg))
|
274 |
return history
|
275 |
|
|
|
276 |
# Create Gradio interface
|
277 |
with gr.Blocks() as demo:
|
278 |
gr.Markdown(f"""
|
@@ -318,5 +334,35 @@ with gr.Blocks() as demo:
|
|
318 |
outputs=[chatbot, question]
|
319 |
)
|
320 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
321 |
if __name__ == "__main__":
|
322 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
from openai import OpenAI
|
3 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
4 |
from datasets import load_dataset, Dataset
|
5 |
from sklearn.neighbors import NearestNeighbors
|
6 |
import numpy as np
|
|
|
34 |
MODEL_NAME = os.environ['OPENAI_MODEL']
|
35 |
client = OpenAI(
|
36 |
base_url=os.environ.get("OPENAI_BASE"),
|
37 |
+
api_key=os.environ.get("OPENAI_API_KEY")
|
38 |
)
|
39 |
else:
|
40 |
+
MODEL_NAME = "deepseek-ai/deepseek-coder-33b-instruct"
|
41 |
# Initialize Hugging Face InferenceClient with GPU support
|
42 |
hf_client = InferenceClient(
|
43 |
model=MODEL_NAME,
|
44 |
api_key=os.environ.get("HF_TOKEN"),
|
45 |
+
timeout=30 # Reduced timeout for faster response
|
46 |
)
|
47 |
|
48 |
# Load the Hugging Face dataset
|
49 |
dataset = load_dataset('tosin2013/autogen', streaming=True)
|
50 |
dataset = Dataset.from_list(list(dataset['train']))
|
51 |
|
52 |
+
# Initialize embeddings
|
|
|
53 |
embeddings = HuggingFaceEmbeddings(
|
54 |
model_name="sentence-transformers/all-MiniLM-L6-v2",
|
55 |
model_kwargs={"device": "cpu"}
|
|
|
58 |
# Extract texts from the dataset
|
59 |
texts = dataset['input']
|
60 |
|
61 |
+
# Create and cache embeddings for the texts
|
62 |
+
if not os.path.exists('embeddings.npy'):
|
63 |
+
print("[LOG] Generating embeddings...")
|
64 |
+
text_embeddings = embeddings.embed_documents(texts)
|
65 |
+
np.save('embeddings.npy', text_embeddings)
|
66 |
+
else:
|
67 |
+
print("[LOG] Loading cached embeddings...")
|
68 |
+
text_embeddings = np.load('embeddings.npy')
|
69 |
+
|
70 |
+
# Fit and cache nearest neighbor model
|
71 |
+
if not os.path.exists('nn_model.pkl'):
|
72 |
+
print("[LOG] Fitting nearest neighbors model...")
|
73 |
+
nn = NearestNeighbors(n_neighbors=5, metric='cosine')
|
74 |
+
nn.fit(np.array(text_embeddings))
|
75 |
+
import pickle
|
76 |
+
with open('nn_model.pkl', 'wb') as f:
|
77 |
+
pickle.dump(nn, f)
|
78 |
+
else:
|
79 |
+
print("[LOG] Loading cached nearest neighbors model...")
|
80 |
+
import pickle
|
81 |
+
with open('nn_model.pkl', 'rb') as f:
|
82 |
+
nn = pickle.load(f)
|
83 |
|
84 |
def get_relevant_documents(query, k=5):
|
85 |
"""
|
86 |
Retrieves the k most relevant documents to the query.
|
87 |
"""
|
88 |
+
import time
|
89 |
+
start_time = time.time()
|
90 |
+
|
91 |
+
query_embedding = embeddings.embed_query(query)
|
92 |
+
distances, indices = nn.kneighbors([query_embedding], n_neighbors=k)
|
93 |
+
relevant_docs = [texts[i] for i in indices[0]]
|
94 |
+
|
95 |
+
elapsed_time = time.time() - start_time
|
96 |
+
print(f"[PERF] get_relevant_documents took {elapsed_time:.2f} seconds")
|
97 |
+
return relevant_docs
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
def generate_response(question, history):
|
100 |
+
import time
|
101 |
+
start_time = time.time()
|
102 |
+
|
103 |
try:
|
104 |
+
response = _generate_response_gpu(question, history)
|
|
|
|
|
105 |
except Exception as e:
|
106 |
+
print(f"[WARNING] GPU failed: {str(e)}")
|
107 |
+
response = _generate_response_cpu(question, history)
|
108 |
+
|
109 |
+
elapsed_time = time.time() - start_time
|
110 |
+
print(f"[PERF] generate_response took {elapsed_time:.2f} seconds")
|
111 |
+
return response
|
112 |
|
113 |
def _generate_response_gpu(question, history):
|
114 |
print(f"\n[LOG] Received question: {question}")
|
|
|
288 |
history.append((question, error_msg))
|
289 |
return history
|
290 |
|
291 |
+
|
292 |
# Create Gradio interface
|
293 |
with gr.Blocks() as demo:
|
294 |
gr.Markdown(f"""
|
|
|
334 |
outputs=[chatbot, question]
|
335 |
)
|
336 |
|
337 |
+
import socket
|
338 |
+
|
339 |
+
def find_available_port(start_port=7860, end_port=7900):
|
340 |
+
for port in range(start_port, end_port + 1):
|
341 |
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
342 |
+
try:
|
343 |
+
s.bind(('', port))
|
344 |
+
return port
|
345 |
+
except OSError:
|
346 |
+
continue
|
347 |
+
raise OSError(f"No available ports between {start_port} and {end_port}")
|
348 |
+
|
349 |
if __name__ == "__main__":
|
350 |
+
try:
|
351 |
+
port = find_available_port()
|
352 |
+
print(f"[LOG] Launching application on port {port}")
|
353 |
+
demo.launch(
|
354 |
+
server_port=port,
|
355 |
+
share=True,
|
356 |
+
server_name="0.0.0.0",
|
357 |
+
prevent_thread_lock=True
|
358 |
+
)
|
359 |
+
# Verify server is actually running
|
360 |
+
import time
|
361 |
+
time.sleep(2) # Give server time to start
|
362 |
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
363 |
+
if s.connect_ex(('localhost', port)) == 0:
|
364 |
+
print(f"[SUCCESS] Server is running on port {port}")
|
365 |
+
else:
|
366 |
+
print(f"[ERROR] Failed to bind to port {port}")
|
367 |
+
except Exception as e:
|
368 |
+
print(f"[ERROR] Failed to start application: {str(e)}")
|
embeddings.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9538fa345c8d0006fb2cb25372e1cbcd7d761ea7c02307196878823c3d09942b
|
3 |
+
size 1483904
|