Narendra-Modi / app.py
Abhaykoul's picture
Create app.py
54d1de1 verified
raw
history blame
4.31 kB
import json
import subprocess
from llama_cpp import Llama
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
from llama_cpp_agent.providers import LlamaCppPythonProvider
from llama_cpp_agent.chat_history import BasicChatHistory
from llama_cpp_agent.chat_history.messages import Roles
import gradio as gr
from huggingface_hub import hf_hub_download
# Download models
hf_hub_download(
repo_id="CharacterEcho/Narendra-Modi",
filename="narendra-modi-iq4_xs-imat.gguf",
local_dir="./models"
)
hf_hub_download(
repo_id="CharacterEcho/Narendra-Modi",
filename="narendra-modi-q6_k.gguf",
local_dir="./models"
)
llm = None
llm_model = None
def respond(
message,
history: list[tuple[str, str]],
model,
system_message,
max_tokens,
temperature,
top_p,
top_k,
repeat_penalty,
):
chat_template = MessagesFormatterType.CHATML
global llm
global llm_model
if llm is None or llm_model != model:
llm = Llama(
model_path=f"models/{model}",
n_ctx=2048, # Reduced context size for CPU
n_threads=4, # Adjust this based on your CPU cores
n_gpu_layers=50
)
llm_model = model
provider = LlamaCppPythonProvider(llm)
agent = LlamaCppAgent(
provider,
system_prompt=f"{system_message}",
predefined_messages_formatter_type=chat_template,
debug_output=True
)
settings = provider.get_provider_default_settings()
settings.temperature = temperature
settings.top_k = top_k
settings.top_p = top_p
settings.max_tokens = max_tokens
settings.repeat_penalty = repeat_penalty
settings.stream = True
messages = BasicChatHistory()
for msn in history:
user = {
'role': Roles.user,
'content': msn[0]
}
assistant = {
'role': Roles.assistant,
'content': msn[1]
}
messages.add_message(user)
messages.add_message(assistant)
stream = agent.get_chat_response(
message,
llm_sampling_settings=settings,
chat_history=messages,
returns_streaming_generator=True,
print_output=False
)
outputs = ""
for output in stream:
outputs += output
yield outputs
description = "The Narendra Modi AI model, developed by CharacterEcho, is trained to emulate the personality and speech patterns of Narendra Modi, the Prime Minister of India."
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Dropdown([
'narendra-modi-iq4_xs-imat.gguf',
'narendra-modi-q6_k.gguff'
],
value="narendra-modi-iq4_xs-imat.gguf",
label="Model"
),
gr.Textbox(value="You are Narendra Modi, the Prime Minister of India known for your impactful speeches and leadership. Step into the shoes of Narendra Modi and embody his unique personality. Imagine you are addressing the nation on an important issue. Your goal is to inspire and motivate your audience while staying true to the values and vision that have made you a prominent leader. Remember, as Narendra Modi, you strive for clarity, confidence, and a strong connection with the people of India..", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p",
),
gr.Slider(
minimum=0,
maximum=100,
value=40,
step=1,
label="Top-k",
),
gr.Slider(
minimum=0.0,
maximum=2.0,
value=1.1,
step=0.1,
label="Repetition penalty",
),
],
retry_btn="Retry",
undo_btn="Undo",
clear_btn="Clear",
submit_btn="Send",
title="Chat with CharacterEcho/Narendra-Modi using llama.cpp",
description=description,
chatbot=gr.Chatbot(
scale=1,
likeable=False,
show_copy_button=True
)
)
if __name__ == "__main__":
demo.launch()