Bahodir Nematjonov commited on
Commit
a4ac1ab
·
1 Parent(s): efb3b66

debuging docker

Browse files
Files changed (2) hide show
  1. Dockerfile +4 -20
  2. utils.py +23 -12
Dockerfile CHANGED
@@ -8,27 +8,11 @@ WORKDIR /code
8
  COPY ./requirements.txt /code/requirements.txt
9
  RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
10
 
11
- # Install Ollama (needed for LLM response generation)
12
  RUN curl -fsSL https://ollama.com/install.sh | sh
13
 
14
- # Create a new user named "user" with user ID 1000 (non-root user for security)
15
- RUN useradd -m -u 1000 user
16
-
17
- # Switch to the "user" user
18
- USER user
19
-
20
- # Set environment variables
21
- ENV HOME=/home/user \
22
- PATH=/home/user/.local/bin:$PATH
23
-
24
- # Set the working directory to the user's home directory
25
- WORKDIR $HOME/app
26
-
27
- # Copy project files and set ownership to the user
28
- COPY --chown=user . $HOME/app
29
-
30
- # Expose the port FastAPI will run on
31
  EXPOSE 7860
32
 
33
- # Start FastAPI server with Uvicorn
34
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
8
  COPY ./requirements.txt /code/requirements.txt
9
  RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
10
 
11
+ # Install Ollama
12
  RUN curl -fsSL https://ollama.com/install.sh | sh
13
 
14
+ # Expose FastAPI's port
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  EXPOSE 7860
16
 
17
+ # Start Ollama in the background and then run FastAPI
18
+ CMD ollama serve & uvicorn main:app --host 0.0.0.0 --port 7860
utils.py CHANGED
@@ -1,7 +1,7 @@
1
  import asyncio
2
  import ollama
3
  from typing import List
4
-
5
  def cosine_similarity(embedding_0, embedding_1):
6
  pass
7
 
@@ -9,15 +9,26 @@ def generate_embedding(model, text: str, model_type: str) -> List[float]:
9
  pass
10
 
11
  async def generate_stream(query: str):
12
- """Stream responses from Ollama in real-time."""
13
- stream = ollama.chat(
14
- model="llama3.2", # Choose your model (mistral, llama2, gemma)
15
- messages=[{"role": "user", "content": query}],
16
- stream=True # Enable streaming
17
- )
18
-
19
- for chunk in stream:
20
- if "message" in chunk and "content" in chunk["message"]:
21
- yield chunk["message"]["content"]
22
- await asyncio.sleep(0) # Allow async executi
 
 
 
 
 
 
 
 
 
 
 
23
 
 
1
  import asyncio
2
  import ollama
3
  from typing import List
4
+ import time
5
  def cosine_similarity(embedding_0, embedding_1):
6
  pass
7
 
 
9
  pass
10
 
11
  async def generate_stream(query: str):
12
+ """Stream responses from Ollama with automatic retries."""
13
+ max_retries = 5 # Retry 5 times
14
+ delay = 3 # Wait 3 seconds before retrying
15
+
16
+ for attempt in range(max_retries):
17
+ try:
18
+ stream = ollama.chat(
19
+ model="mistral", # Use your preferred model
20
+ messages=[{"role": "user", "content": query}],
21
+ stream=True
22
+ )
23
+ for chunk in stream:
24
+ if "message" in chunk and "content" in chunk["message"]:
25
+ yield chunk["message"]["content"]
26
+ await asyncio.sleep(0)
27
+ return
28
+ except Exception as e:
29
+ print(f"❌ Ollama connection failed (Attempt {attempt+1}/{max_retries}): {str(e)}")
30
+ if attempt < max_retries - 1:
31
+ time.sleep(delay) # Wait before retrying
32
+ else:
33
+ yield "⚠️ Error: Could not connect to Ollama after multiple attempts."
34