Drag2121 commited on
Commit
0af890e
·
1 Parent(s): 0ba5adf

lanchain ollama not chatollama

Browse files
Files changed (2) hide show
  1. app.py +8 -12
  2. requirements.txt +2 -1
app.py CHANGED
@@ -2,11 +2,10 @@ import os
2
  from fastapi import FastAPI, HTTPException
3
  from fastapi.responses import StreamingResponse
4
  from pydantic import BaseModel
5
- from langchain_ollama import ChatOllama
6
- from langchain.schema import HumanMessage
7
  import logging
8
  from functools import lru_cache
9
-
10
  # Set up logging
11
  logging.basicConfig(level=logging.INFO)
12
  logger = logging.getLogger(__name__)
@@ -16,7 +15,7 @@ MODEL_NAME = 'phi3:mini'
16
 
17
  @lru_cache()
18
  def get_llm():
19
- return ChatOllama(model=MODEL_NAME)
20
 
21
  class Question(BaseModel):
22
  text: str
@@ -30,10 +29,9 @@ async def ask_question(question: Question):
30
  try:
31
  logger.info(f"Received question: {question.text}")
32
  llm = get_llm()
33
- messages = [HumanMessage(content=question.text)]
34
- response = llm(messages)
35
  logger.info("Response generated successfully")
36
- return {"answer": response.content}
37
  except Exception as e:
38
  logger.error(f"Error in /ask endpoint: {str(e)}")
39
  raise HTTPException(status_code=500, detail=str(e))
@@ -43,14 +41,12 @@ async def ask_question_stream(question: Question):
43
  try:
44
  logger.info(f"Received question for streaming: {question.text}")
45
  llm = get_llm()
46
- messages = [HumanMessage(content=question.text)]
47
 
48
  async def generate():
49
  full_response = ""
50
- async for chunk in llm.astream(messages):
51
- if chunk.content:
52
- full_response += chunk.content
53
- yield chunk.content
54
 
55
  # Log the full response after streaming is complete
56
  logger.info(f"Full streamed response: {full_response}")
 
2
  from fastapi import FastAPI, HTTPException
3
  from fastapi.responses import StreamingResponse
4
  from pydantic import BaseModel
5
+ from langchain_community.llms import Ollama
6
+ from langchain_core.messages import HumanMessage
7
  import logging
8
  from functools import lru_cache
 
9
  # Set up logging
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
 
15
 
16
  @lru_cache()
17
  def get_llm():
18
+ return Ollama(model=MODEL_NAME)
19
 
20
  class Question(BaseModel):
21
  text: str
 
29
  try:
30
  logger.info(f"Received question: {question.text}")
31
  llm = get_llm()
32
+ response = llm.invoke(question.text)
 
33
  logger.info("Response generated successfully")
34
+ return {"answer": response}
35
  except Exception as e:
36
  logger.error(f"Error in /ask endpoint: {str(e)}")
37
  raise HTTPException(status_code=500, detail=str(e))
 
41
  try:
42
  logger.info(f"Received question for streaming: {question.text}")
43
  llm = get_llm()
 
44
 
45
  async def generate():
46
  full_response = ""
47
+ async for chunk in llm.astream(question.text):
48
+ full_response += chunk
49
+ yield chunk
 
50
 
51
  # Log the full response after streaming is complete
52
  logger.info(f"Full streamed response: {full_response}")
requirements.txt CHANGED
@@ -2,4 +2,5 @@ fastapi
2
  uvicorn[standard]
3
  langchain-ollama
4
  langchain
5
- pydantic
 
 
2
  uvicorn[standard]
3
  langchain-ollama
4
  langchain
5
+ pydantic
6
+ langchain-community