Spaces:

zeeshan391
/

fast_api_deploy

Sleeping

zeeshan391 commited on Sep 11, 2024

Commit

d927270

verified ·

1 Parent(s): 8d87399

updated app

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
-from langchain_huggingface.llms import HuggingFacePipeline
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
 from langchain_core.prompts import ChatPromptTemplate
@@ -25,10 +25,30 @@ class StoryRequest(BaseModel):
 # Initialize the LLM
 callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
-tokenizer = AutoTokenizer.from_pretrained("tohur/natsumura-storytelling-rp-1.0-llama-3.1-8b")
-model = AutoModelForCausalLM.from_pretrained("tohur/natsumura-storytelling-rp-1.0-llama-3.1-8b")
-pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=2000)
-llm = HuggingFacePipeline(pipeline=pipe)
 # Create a prompt template
 # system = """You are a helpful and creative assistant that specializes in generating engaging and imaginative stories for kids.

 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+from huggingface_hub.file_download import http_get
+from llama_cpp import Llama
 from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
 from langchain_core.prompts import ChatPromptTemplate
 # Initialize the LLM
 callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
+def load_model(
+    directory: str = ".",
+    model_name: str = "natsumura-storytelling-rp-1.0-llama-3.1-8B.Q3_K_M.gguf",
+    model_url: str = "https://huggingface.co/tohur/natsumura-storytelling-rp-1.0-llama-3.1-8b-GGUF/tree/main/natsumura-storytelling-rp-1.0-llama-3.1-8B.Q3_K_M.gguf"
+):
+    final_model_path = os.path.join(directory, model_name)
+    print("Downloading all files...")
+    if not os.path.exists(final_model_path):
+        with open(final_model_path, "wb") as f:
+            http_get(model_url, f)
+    os.chmod(final_model_path, 0o777)
+    print("Files downloaded!")
+    model = Llama(
+        model_path=final_model_path,
+        n_ctx=1024
+    )
+    print("Model loaded!")
+    return model
+llm = load_model()
 # Create a prompt template
 # system = """You are a helpful and creative assistant that specializes in generating engaging and imaginative stories for kids.