#!/usr/bin/env python import os from collections.abc import Iterator from threading import Thread import gradio as gr import spaces import torch from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, pipeline from peft import PeftModel DESCRIPTION = "# 真空ジェネレータ\n

Imitate 真空 (@vericava)'s posts interactively

" if not torch.cuda.is_available(): DESCRIPTION += "\n

Running on CPU 🥶 This demo does not work on CPU.

" MAX_MAX_NEW_TOKENS = 2048 DEFAULT_MAX_NEW_TOKENS = 1024 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "32768")) if torch.cuda.is_available(): model_id = "vericava/llm-jp-3-1.8b-instruct-lora-vericava17" base_model_id = "llm-jp/llm-jp-3-1.8b-instruct" tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True) tokenizer.chat_template = "{{bos_token}}{% for message in messages %}{% if message['role'] == 'user' %}{{ '\\n\\n### 前の投稿:\\n' + message['content'] + '' }}{% elif message['role'] == 'system' %}{{ '以下は、SNS上の投稿です。あなたはSNSの投稿生成botとして、次に続く投稿を考えなさい。説明はせず、投稿の内容のみを鉤括弧をつけずに答えよ。' + message['content'] }}{% elif message['role'] == 'assistant' %}{{ '\\n\\n### 次の投稿:\\n' + message['content'] + eos_token }}{% endif %}{% if loop.last and add_generation_prompt %}{{ '\\n\\n### 次の投稿:\\n' }}{% endif %}{% endfor %}" model = AutoModelForCausalLM.from_pretrained( base_model_id, trust_remote_code=True, ) model.load_adapter(model_id) my_pipeline=pipeline( task="text-generation", model=model, tokenizer=tokenizer, do_sample=True, num_beams=1, ) @spaces.GPU @torch.inference_mode() def generate( message: str, chat_history: list[tuple[str, str]], max_new_tokens: int = 1024, temperature: float = 0.7, top_p: float = 0.95, top_k: int = 50, repetition_penalty: float = 1.0, ) -> Iterator[str]: from datetime import datetime, timezone, timedelta d=datetime.now(timezone(timedelta(hours=9), 'JST')) m=d.month if m < 3 or m > 11: season = '冬' elif m < 6: season = '春' elif m < 9: season = '夏' else: season = '秋' h=d.hour go = '午前' if h < 12 else '午後' h = h % 12 minute = d.minute time = go + str(h) + '時' + str(minute) + '分' messages = [ {"role": "system", "content": "なお今は日本の" + season + "で、時刻は" + time + "であるものとする。また、あなたは真空という名前のユーザであるとする。"}, {"role": "user", "content": message}, ] output = my_pipeline( messages, temperature=temperature, max_new_tokens=max_new_tokens, ) print(output) yield output[-1]["generated_text"][-1]["content"] demo = gr.ChatInterface( fn=generate, type="tuples", additional_inputs_accordion=gr.Accordion(label="詳細設定", open=False), additional_inputs=[ gr.Slider( label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS, ), gr.Slider( label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=1.0, ), gr.Slider( label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.95, ), gr.Slider( label="Top-k", minimum=1, maximum=1000, step=1, value=50, ), gr.Slider( label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.5, ), ], stop_btn=None, examples=[ ["サマリーを作る男の人，サマリーマン。"], ["やばい場所にクリティカルな配線ができてしまったので掲示した。"], ["にゃん"], ["Wikipedia の情報は入っているのかもしれない"], ], description=DESCRIPTION, css_paths="style.css", fill_height=True, ) if __name__ == "__main__": demo.launch()