Spaces:
Sleeping
Sleeping
File size: 4,622 Bytes
5fdb69e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "d25b0aef-3e5e-4026-90ee-2b373bf262b7",
"metadata": {},
"outputs": [],
"source": [
"# Step 0: Import Libraries\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"import ollama\n",
"from openai import OpenAI\n",
"import requests\n",
"\n",
"# Step 1: Set Constants and Variables\n",
"print(\"[INFO] Setting constants and variable ...\")\n",
"WEBSITE_URL = \"https://arxiv.org/\"\n",
"MODEL = \"llama3.2\"\n",
"approaches = [\"local-call\", \"python-package\", \"openai-python-library\"]\n",
"approach = approaches[2]\n",
"\n",
"# Step 1: Scrape Website\n",
"print(\"[INFO] Scraping website ...\")\n",
"url_response = requests.get(\n",
" url=WEBSITE_URL,\n",
" headers={\"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"}\n",
" )\n",
"soup = BeautifulSoup(\n",
" markup=url_response.content,\n",
" features=\"html.parser\"\n",
" )\n",
"website_title = soup.title.string if soup.title else \"No title found!!!\"\n",
"for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
"website_text = soup.body.get_text(\n",
" separator=\"\\n\",\n",
" strip=True\n",
" )\n",
"\n",
"# Step 2: Create Prompts\n",
"print(\"[INFO] Creating system prompt ...\")\n",
"system_prompt = \"You are an assistant that analyzes the contents of a \\\n",
" website and provides a short summary, ignoring text that might be \\\n",
" navigation related. Respond in markdown.\"\n",
"\n",
"print(\"[INFO] Creating user prompt ...\")\n",
"user_prompt = f\"You are looking at a website titled {website_title}\"\n",
"user_prompt += \"\\nBased on the contents of the website, please provide \\\n",
" a short summary of this website in markdown. If the website \\\n",
" includes news or announcements, summarize them, too. The contents \\\n",
" of this website are as follows:\\n\\n\"\n",
"user_prompt += website_text\n",
"\n",
"# Step 3: Make Messages List\n",
"print(\"[INFO] Making messages list ...\")\n",
"messages = [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt}\n",
"]\n",
"\n",
"# Step 4: Call Model and Print Results\n",
"if approach == \"local-call\":\n",
" response = requests.post(\n",
" url=\"http://localhost:11434/api/chat\",\n",
" json={\n",
" \"model\": MODEL,\n",
" \"messages\": messages,\n",
" \"stream\": False\n",
" },\n",
" headers={\"Content-Type\": \"application/json\"}\n",
" )\n",
" print(\"[INFO] Printing result ...\")\n",
" display(Markdown(response.json()[\"message\"][\"content\"]))\n",
"elif approach == \"python-package\":\n",
" response = ollama.chat(\n",
" model=MODEL,\n",
" messages=messages,\n",
" stream=False\n",
" )\n",
" print(\"[INFO] Printing result ...\")\n",
" display(Markdown(response[\"message\"][\"content\"]))\n",
"elif approach == \"openai-python-library\":\n",
" ollama_via_openai = OpenAI(\n",
" base_url=\"http://localhost:11434/v1\",\n",
" api_key=\"ollama\"\n",
" )\n",
" response = ollama_via_openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=messages\n",
" )\n",
" print(\"[INFO] Printing result ...\")\n",
" display(Markdown(response.choices[0].message.content))\n",
"else:\n",
" raise ValueError(f\"[INFO] Invalid approach! Please select an approach from {approaches} and try again.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b0a6676e-fb43-4725-9389-2acd74c13c4e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|