Spaces:

CharacterEcho
/

Narendra-Modi

Sleeping

App Files Files Community

Narendra-Modi / app.py

Abhaykoul

Create app.py

54d1de1 verified 10 months ago

raw

history blame

4.31 kB

	import json
	import subprocess
	from llama_cpp import Llama
	from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
	from llama_cpp_agent.providers import LlamaCppPythonProvider
	from llama_cpp_agent.chat_history import BasicChatHistory
	from llama_cpp_agent.chat_history.messages import Roles
	import gradio as gr
	from huggingface_hub import hf_hub_download

	# Download models
	hf_hub_download(
	repo_id="CharacterEcho/Narendra-Modi",
	filename="narendra-modi-iq4_xs-imat.gguf",
	local_dir="./models"
	)
	hf_hub_download(
	repo_id="CharacterEcho/Narendra-Modi",
	filename="narendra-modi-q6_k.gguf",
	local_dir="./models"
	)
	llm = None
	llm_model = None

	def respond(
	message,
	history: list[tuple[str, str]],
	model,
	system_message,
	max_tokens,
	temperature,
	top_p,
	top_k,
	repeat_penalty,
	):
	chat_template = MessagesFormatterType.CHATML

	global llm
	global llm_model

	if llm is None or llm_model != model:
	llm = Llama(
	model_path=f"models/{model}",
	n_ctx=2048, # Reduced context size for CPU
	n_threads=4, # Adjust this based on your CPU cores
	n_gpu_layers=50
	)
	llm_model = model

	provider = LlamaCppPythonProvider(llm)

	agent = LlamaCppAgent(
	provider,
	system_prompt=f"{system_message}",
	predefined_messages_formatter_type=chat_template,
	debug_output=True
	)

	settings = provider.get_provider_default_settings()
	settings.temperature = temperature
	settings.top_k = top_k
	settings.top_p = top_p
	settings.max_tokens = max_tokens
	settings.repeat_penalty = repeat_penalty
	settings.stream = True

	messages = BasicChatHistory()

	for msn in history:
	user = {
	'role': Roles.user,
	'content': msn[0]
	}
	assistant = {
	'role': Roles.assistant,
	'content': msn[1]
	}
	messages.add_message(user)
	messages.add_message(assistant)

	stream = agent.get_chat_response(
	message,
	llm_sampling_settings=settings,
	chat_history=messages,
	returns_streaming_generator=True,
	print_output=False
	)

	outputs = ""
	for output in stream:
	outputs += output
	yield outputs

	description = "The Narendra Modi AI model, developed by CharacterEcho, is trained to emulate the personality and speech patterns of Narendra Modi, the Prime Minister of India."

	demo = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Dropdown([
	'narendra-modi-iq4_xs-imat.gguf',
	'narendra-modi-q6_k.gguff'
	],
	value="narendra-modi-iq4_xs-imat.gguf",
	label="Model"
	),
	gr.Textbox(value="You are Narendra Modi, the Prime Minister of India known for your impactful speeches and leadership. Step into the shoes of Narendra Modi and embody his unique personality. Imagine you are addressing the nation on an important issue. Your goal is to inspire and motivate your audience while staying true to the values and vision that have made you a prominent leader. Remember, as Narendra Modi, you strive for clarity, confidence, and a strong connection with the people of India..", label="System message"),
	gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max tokens"),
	gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p",
	),
	gr.Slider(
	minimum=0,
	maximum=100,
	value=40,
	step=1,
	label="Top-k",
	),
	gr.Slider(
	minimum=0.0,
	maximum=2.0,
	value=1.1,
	step=0.1,
	label="Repetition penalty",
	),
	],
	retry_btn="Retry",
	undo_btn="Undo",
	clear_btn="Clear",
	submit_btn="Send",
	title="Chat with CharacterEcho/Narendra-Modi using llama.cpp",
	description=description,
	chatbot=gr.Chatbot(
	scale=1,
	likeable=False,
	show_copy_button=True
	)
	)

	if __name__ == "__main__":
	demo.launch()