tosin2013 commited on
Commit
f60608b
·
verified ·
1 Parent(s): babca8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -30
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import os
2
- import openai
3
  from openai import OpenAI
4
  from langchain_community.embeddings import HuggingFaceEmbeddings
5
  from datasets import load_dataset, Dataset
@@ -11,6 +10,7 @@ from typing import List
11
  from langchain_core.output_parsers import StrOutputParser
12
  from langchain_core.prompts import ChatPromptTemplate
13
  import gradio as gr
 
14
 
15
  # Configuration
16
 
@@ -25,7 +25,6 @@ DEFAULT_QUESTION = "Ask me anything in the context of persona-driven prompt gene
25
  os.environ['OPENAI_BASE'] = "https://api.openai.com/v1"
26
  os.environ['OPENAI_MODEL'] = "gpt-4"
27
  os.environ['MODEL_PROVIDER'] = "huggingface"
28
- api_key = os.environ.get("OPENAI_API_KEY")
29
  model_provider = os.environ.get("MODEL_PROVIDER")
30
 
31
  # Instantiate the client for openai v1.x
@@ -37,23 +36,10 @@ if model_provider.lower() == "openai":
37
  )
38
  else:
39
  MODEL_NAME = "meta-llama/Llama-3.3-70B-Instruct"
40
- # Initialize Hugging Face client with streaming support
41
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=os.environ.get("HF_TOKEN"))
42
- model = AutoModelForCausalLM.from_pretrained(
43
- MODEL_NAME,
44
- device_map='auto',
45
- token=os.environ.get("HF_TOKEN"),
46
- torch_dtype=torch.bfloat16,
47
- )
48
- streamer = TextStreamer(tokenizer, skip_prompt=True)
49
- question_answerer = pipeline(
50
- "text-generation",
51
- model=model,
52
- tokenizer=tokenizer,
53
- device_map='auto',
54
- streamer=streamer,
55
- max_new_tokens=512,
56
- return_full_text=False
57
  )
58
 
59
  # Load the Hugging Face dataset
@@ -96,18 +82,26 @@ def generate_response(question, history):
96
  print(f"[LOG] Generated prompt: {prompt[:200]}...") # Log first 200 chars of prompt
97
 
98
  if model_provider.lower() == "huggingface":
99
- prompt_template = """
100
- <s>[INST] <<SYS>>
101
- You are a helpful AI assistant. Answer the question based on the provided context.
102
- <</SYS>>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- {prompt}[/INST]
105
- """
106
- chat_prompt = ChatPromptTemplate.from_template(prompt_template)
107
- result = question_answerer(chat_prompt.format(prompt=prompt))
108
- response = result[0]['generated_text'] if isinstance(result, list) else result
109
- print(f"[LOG] Using Hugging Face model: {MODEL_NAME}")
110
- print(f"[LOG] Hugging Face response: {response[:200]}...") # Log first 200 chars of response
111
  elif model_provider.lower() == "openai":
112
  response = client.chat.completions.create(
113
  model=os.environ.get("OPENAI_MODEL"),
 
1
  import os
 
2
  from openai import OpenAI
3
  from langchain_community.embeddings import HuggingFaceEmbeddings
4
  from datasets import load_dataset, Dataset
 
10
  from langchain_core.output_parsers import StrOutputParser
11
  from langchain_core.prompts import ChatPromptTemplate
12
  import gradio as gr
13
+ from huggingface_hub import InferenceClient
14
 
15
  # Configuration
16
 
 
25
  os.environ['OPENAI_BASE'] = "https://api.openai.com/v1"
26
  os.environ['OPENAI_MODEL'] = "gpt-4"
27
  os.environ['MODEL_PROVIDER'] = "huggingface"
 
28
  model_provider = os.environ.get("MODEL_PROVIDER")
29
 
30
  # Instantiate the client for openai v1.x
 
36
  )
37
  else:
38
  MODEL_NAME = "meta-llama/Llama-3.3-70B-Instruct"
39
+ # Initialize Hugging Face InferenceClient
40
+ hf_client = InferenceClient(
41
+ model=MODEL_NAME,
42
+ api_key=os.environ.get("HF_TOKEN")
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  )
44
 
45
  # Load the Hugging Face dataset
 
82
  print(f"[LOG] Generated prompt: {prompt[:200]}...") # Log first 200 chars of prompt
83
 
84
  if model_provider.lower() == "huggingface":
85
+ messages = [
86
+ {
87
+ "role": "system",
88
+ "content": "You are a helpful AI assistant. Answer the question based on the provided context."
89
+ },
90
+ {
91
+ "role": "user",
92
+ "content": prompt
93
+ }
94
+ ]
95
+
96
+ completion = hf_client.chat.completions.create(
97
+ model=MODEL_NAME,
98
+ messages=messages,
99
+ max_tokens=500
100
+ )
101
+ response = completion.choices[0].message.content
102
+ print(f"[LOG] Using Hugging Face model (serverless): {MODEL_NAME}")
103
+ print(f"[LOG] Hugging Face response: {response[:200]}...")
104
 
 
 
 
 
 
 
 
105
  elif model_provider.lower() == "openai":
106
  response = client.chat.completions.create(
107
  model=os.environ.get("OPENAI_MODEL"),