Text Generation
English
instruction-following
reasoning
File size: 12,892 Bytes
323f598
1
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyOpe6URbmt5CbVG1CVVUIr6"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e-NVlZMV8Oh7","executionInfo":{"status":"ok","timestamp":1723254070685,"user_tz":-345,"elapsed":31968,"user":{"displayName":"Basab Jha","userId":"01698150105745770629"}},"outputId":"f96aa546-1cf5-4890-ebdd-12197488c657"},"outputs":[{"output_type":"stream","name":"stdout","text":["Mounted at /content/drive\n"]}],"source":["from google.colab import drive\n","drive.mount('/content/drive')"]},{"cell_type":"code","source":["%cd /content/drive/MyDrive/GEM_Project/"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"3aTqbSUT8UZs","executionInfo":{"status":"ok","timestamp":1723254982318,"user_tz":-345,"elapsed":484,"user":{"displayName":"Basab Jha","userId":"01698150105745770629"}},"outputId":"0f7b31c2-329c-43f5-8c5d-06f7cf54663d"},"execution_count":16,"outputs":[{"output_type":"stream","name":"stdout","text":["/content/drive/MyDrive/GEM_Project\n"]}]},{"cell_type":"code","source":["%%writefile /GEM_Project/Testings/testing.py\n","import torch\n","from transformers import AutoTokenizer, AutoModelForCausalLM\n","\n","# Model and tokenizer paths\n","model_path = \"/GEM_Project/GEM_1o_Aug.pt\"\n","tokenizer_path = \"/GEM_Project/tokenizer/tokenizer\"\n","\n","# Load the tokenizer\n","tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)\n","\n","# Load the model\n","model = AutoModelForCausalLM.from_pretrained(model_path)\n","\n","# Set the model to evaluation mode\n","model.eval()\n","\n","# Set device to GPU if available\n","device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n","model.to(device)\n","\n","# Define a function to generate text based on a prompt\n","def generate_text(prompt, max_length=50, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7):\n","    # Tokenize the input prompt\n","    input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)\n","\n","    # Generate output from the model\n","    output = model.generate(\n","        input_ids,\n","        max_length=max_length,\n","        num_return_sequences=num_return_sequences,\n","        no_repeat_ngram_size=no_repeat_ngram_size,\n","        do_sample=True,\n","        top_k=top_k,\n","        top_p=top_p,\n","        temperature=temperature\n","    )\n","\n","    # Decode the generated output\n","    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)\n","    return generated_text\n","\n","# Test prompts\n","prompts = [\n","    \"The future of artificial intelligence is\",\n","    \"Once upon a time in a land far away,\",\n","    \"In the field of natural language processing,\",\n","    \"The concept of creativity in machines is\"\n","]\n","\n","# Generate and print outputs for each prompt\n","for prompt in prompts:\n","    print(f\"Prompt: {prompt}\")\n","    generated_text = generate_text(prompt)\n","    print(f\"Generated: {generated_text}\\n\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":530},"collapsed":true,"id":"h46NWxXr8oee","executionInfo":{"status":"error","timestamp":1723255000696,"user_tz":-345,"elapsed":503,"user":{"displayName":"Basab Jha","userId":"01698150105745770629"}},"outputId":"22a59c5f-b5a2-4eef-b30e-3d3be9c3a780"},"execution_count":17,"outputs":[{"output_type":"stream","name":"stdout","text":["Writing /GEM_Project/Testings/testing.py\n"]},{"output_type":"error","ename":"FileNotFoundError","evalue":"[Errno 2] No such file or directory: '/GEM_Project/Testings/testing.py'","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)","\u001b[0;32m<ipython-input-17-0335cf91e209>\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_cell_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'writefile'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'/GEM_Project/Testings/testing.py'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'import torch\\nfrom transformers import AutoTokenizer, AutoModelForCausalLM\\n\\n# Model and tokenizer paths\\nmodel_path = \"/GEM_Project/GEM_1o_Aug.pt\"\\ntokenizer_path = \"/GEM_Project/tokenizer/tokenizer\"\\n\\n# Load the tokenizer\\ntokenizer = AutoTokenizer.from_pretrained(tokenizer_path)\\n\\n# Load the model\\nmodel = AutoModelForCausalLM.from_pretrained(model_path)\\n\\n# Set the model to evaluation mode\\nmodel.eval()\\n\\n# Set device to GPU if available\\ndevice = torch.device(\\'cuda\\' if torch.cuda.is_available() else \\'cpu\\')\\nmodel.to(device)\\n\\n# Define a function to generate text based on a prompt\\ndef generate_text(prompt, max_length=50, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7):\\n    # Tokenize the input prompt\\n    input_ids = tokenizer.encode(prompt, return_tensors=\\'pt\\').to(device)\\n\\n    # Generate output from the model\\n    output = model.generate(\\n        input_ids,\\n        max_length=max_length,\\n        num_return_sequences=num_return_sequences,\\n        no_repeat_ngram_size=no_repeat_ngram_size,\\n        do_sample=True,\\n        top_k=top_k,\\n        top_p=top_p,\\n        temperature=temperature\\n    )\\n\\n    # Decode the generated output\\n    generated_text = tokenizer.decode(output[0], skip_special_tokens=Tr...\n\u001b[0m","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/google/colab/_shell.py\u001b[0m in \u001b[0;36mrun_cell_magic\u001b[0;34m(self, magic_name, line, cell)\u001b[0m\n\u001b[1;32m    332\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mline\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    333\u001b[0m       \u001b[0mcell\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m' '\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 334\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_cell_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmagic_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mline\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    335\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    336\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\u001b[0m in \u001b[0;36mrun_cell_magic\u001b[0;34m(self, magic_name, line, cell)\u001b[0m\n\u001b[1;32m   2471\u001b[0m             \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuiltin_trap\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2472\u001b[0m                 \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mmagic_arg_s\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2473\u001b[0;31m                 \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2474\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2475\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m<decorator-gen-98>\u001b[0m in \u001b[0;36mwritefile\u001b[0;34m(self, line, cell)\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/magic.py\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(f, *a, **k)\u001b[0m\n\u001b[1;32m    185\u001b[0m     \u001b[0;31m# but it's overkill for just that one bit of state.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    186\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mmagic_deco\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 187\u001b[0;31m         \u001b[0mcall\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    188\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    189\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mcallable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/magics/osm.py\u001b[0m in \u001b[0;36mwritefile\u001b[0;34m(self, line, cell)\u001b[0m\n\u001b[1;32m    854\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    855\u001b[0m         \u001b[0mmode\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'a'\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m'w'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 856\u001b[0;31m         \u001b[0;32mwith\u001b[0m \u001b[0mio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'utf-8'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    857\u001b[0m             \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcell\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/GEM_Project/Testings/testing.py'"]}]},{"cell_type":"code","source":["!python testing.py"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"X_02SGwK_BY6","executionInfo":{"status":"ok","timestamp":1723254885616,"user_tz":-345,"elapsed":7324,"user":{"displayName":"Basab Jha","userId":"01698150105745770629"}},"outputId":"e30ed0b8-e9c5-486d-c912-1a58701c6ae4"},"execution_count":14,"outputs":[{"output_type":"stream","name":"stdout","text":["Traceback (most recent call last):\n","  File \"/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py\", line 402, in cached_file\n","    resolved_file = hf_hub_download(\n","  File \"/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_validators.py\", line 106, in _inner_fn\n","    validate_repo_id(arg_value)\n","  File \"/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_validators.py\", line 154, in validate_repo_id\n","    raise HFValidationError(\n","huggingface_hub.errors.HFValidationError: Repo id must be in the form 'repo_name' or 'namespace/repo_name': './tokenizer/gem_tokenizer'. Use `repo_type` argument if needed.\n","\n","The above exception was the direct cause of the following exception:\n","\n","Traceback (most recent call last):\n","  File \"/content/drive/MyDrive/GEM_Project/Testings/testing.py\", line 9, in <module>\n","    tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)\n","  File \"/usr/local/lib/python3.10/dist-packages/transformers/models/auto/tokenization_auto.py\", line 826, in from_pretrained\n","    tokenizer_config = get_tokenizer_config(pretrained_model_name_or_path, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/transformers/models/auto/tokenization_auto.py\", line 658, in get_tokenizer_config\n","    resolved_config_file = cached_file(\n","  File \"/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py\", line 466, in cached_file\n","    raise EnvironmentError(\n","OSError: Incorrect path_or_model_id: './tokenizer/gem_tokenizer'. Please provide either the path to a local folder or the repo_id of a model on the Hub.\n"]}]}]}