import os import gradio as gr from huggingface_hub import login from transformers import pipeline import torch from transformers import AutoTokenizer, AutoModelForCausalLM # Load the gated model #model_name = "RickyDeSkywalker/TheoremLlama" #model_name = "unsloth/Llama-3.2-1B-Instruct" device = "cuda" if torch.cuda.is_available() else "cpu" model_name = "internlm/internlm2-math-plus-7b" HF_TOKEN = os.environ.get("HF_TOKEN") #login(HF_TOKEN) tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) # Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and might cause OOM Error. model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16).eval().to(device) model = model.eval() #generator = pipeline('text-generation', model=model_name, trust_remote_code=True, token=HF_TOKEN) # Function for generating Lean 4 code @torch.inference_mode() def generate_lean4_code(prompt): #result = generator(prompt, max_length=1000, num_return_sequences=1) #return result[0]['generated_text'] response, history = model.chat(tokenizer, prompt, history=[], meta_instruction="") print(response, history) return response # Gradio Interface title = "Lean 4 Code Generation with TheoremLlama" description = "Enter a natural language prompt to generate Lean 4 code." interface = gr.Interface( fn=generate_lean4_code, inputs=gr.Textbox( label="Prompt", placeholder="Prove that the sum of two even numbers is even.", lines=4 ), #outputs=gr.Code(label="Generated Lean 4 Code", language="lean"), outputs=gr.Code(label="Generated Lean 4 Code"), title=title, description=description ) # Launch the Gradio app interface.launch(ssr_mode=False)