Spaces:

b3ndur
/

llama3-hackathon

Sleeping

File size: 29,511 Bytes

11353d1

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Berhasil terkoneksi ke MySQL Server\n",
      "Database 'tourism_destination' berhasil dibuat!\n",
      "Koneksi ke MySQL ditutup\n"
     ]
    }
   ],
   "source": [
    "import mysql.connector\n",
    "from mysql.connector import Error\n",
    "\n",
    "# Fungsi untuk membuat koneksi ke MySQL dan membuat database\n",
    "def create_database(host_name, user_name, user_password, db_name):\n",
    "    try:\n",
    "        # Koneksi ke server MySQL\n",
    "        connection = mysql.connector.connect(\n",
    "            host=host_name,\n",
    "            user=user_name,\n",
    "            password=user_password\n",
    "        )\n",
    "        \n",
    "        if connection.is_connected():\n",
    "            print(\"Berhasil terkoneksi ke MySQL Server\")\n",
    "            cursor = connection.cursor()\n",
    "            # Membuat database baru\n",
    "            cursor.execute(f\"CREATE DATABASE {db_name}\")\n",
    "            print(f\"Database '{db_name}' berhasil dibuat!\")\n",
    "    \n",
    "    except Error as e:\n",
    "        print(f\"Error: '{e}' terjadi\")\n",
    "    \n",
    "    finally:\n",
    "        # Menutup koneksi\n",
    "        if connection.is_connected():\n",
    "            cursor.close()\n",
    "            connection.close()\n",
    "            print(\"Koneksi ke MySQL ditutup\")\n",
    "\n",
    "# Contoh penggunaan\n",
    "host = \"localhost\"\n",
    "user = \"root\"\n",
    "password = \"admin123\"\n",
    "database_name = \"tourism_destination\"\n",
    "\n",
    "create_database(host, user, password, database_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Berhasil terkoneksi ke database 'tourism_destination'\n",
      "Tabel 'places' berhasil dibuat!\n",
      "Koneksi ke MySQL ditutup\n"
     ]
    }
   ],
   "source": [
    "def create_table(host_name, user_name, user_password, db_name):\n",
    "    try:\n",
    "        # Koneksi ke MySQL dan pilih database\n",
    "        connection = mysql.connector.connect(\n",
    "            host=host_name,\n",
    "            user=user_name,\n",
    "            password=user_password,\n",
    "            database=db_name\n",
    "        )\n",
    "        \n",
    "        if connection.is_connected():\n",
    "            print(f\"Berhasil terkoneksi ke database '{db_name}'\")\n",
    "            cursor = connection.cursor()\n",
    "            \n",
    "            # Membuat tabel dengan kolom sesuai format yang diberikan\n",
    "            create_table_query = \"\"\"\n",
    "            CREATE TABLE places (\n",
    "                Place_Id INT AUTO_INCREMENT PRIMARY KEY,\n",
    "                Place_Name VARCHAR(255) NOT NULL,\n",
    "                Description TEXT,\n",
    "                Category VARCHAR(100),\n",
    "                City VARCHAR(100),\n",
    "                Price DECIMAL(10, 2),  \n",
    "                Rating FLOAT \n",
    "            );\n",
    "            \"\"\"\n",
    "            cursor.execute(create_table_query)\n",
    "            print(\"Tabel 'places' berhasil dibuat!\")\n",
    "    \n",
    "    except Error as e:\n",
    "        print(f\"Error: '{e}' terjadi\")\n",
    "    \n",
    "    finally:\n",
    "        # Menutup koneksi\n",
    "        if connection.is_connected():\n",
    "            cursor.close()\n",
    "            connection.close()\n",
    "            print(\"Koneksi ke MySQL ditutup\")\n",
    "\n",
    "# Contoh penggunaan\n",
    "host = \"localhost\"\n",
    "user = \"root\"\n",
    "password = \"admin123\"\n",
    "database_name = \"tourism_destination\"\n",
    "\n",
    "create_table(host, user, password, database_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Place_Id</th>\n",
       "      <th>Place_Name</th>\n",
       "      <th>Description</th>\n",
       "      <th>Category</th>\n",
       "      <th>City</th>\n",
       "      <th>Price</th>\n",
       "      <th>Rating</th>\n",
       "      <th>Time_Minutes</th>\n",
       "      <th>Coordinate</th>\n",
       "      <th>Lat</th>\n",
       "      <th>Long</th>\n",
       "      <th>Unnamed: 11</th>\n",
       "      <th>Unnamed: 12</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Monumen Nasional</td>\n",
       "      <td>Monumen Nasional atau yang populer disingkat d...</td>\n",
       "      <td>Budaya</td>\n",
       "      <td>Jakarta</td>\n",
       "      <td>20000</td>\n",
       "      <td>4.6</td>\n",
       "      <td>15.0</td>\n",
       "      <td>{'lat': -6.1753924, 'lng': 106.8271528}</td>\n",
       "      <td>-6.175392</td>\n",
       "      <td>106.827153</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Kota Tua</td>\n",
       "      <td>Kota tua di Jakarta, yang juga bernama Kota Tu...</td>\n",
       "      <td>Budaya</td>\n",
       "      <td>Jakarta</td>\n",
       "      <td>0</td>\n",
       "      <td>4.6</td>\n",
       "      <td>90.0</td>\n",
       "      <td>{'lat': -6.137644799999999, 'lng': 106.8171245}</td>\n",
       "      <td>-6.137645</td>\n",
       "      <td>106.817125</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Dunia Fantasi</td>\n",
       "      <td>Dunia Fantasi atau disebut juga Dufan adalah t...</td>\n",
       "      <td>Taman Hiburan</td>\n",
       "      <td>Jakarta</td>\n",
       "      <td>270000</td>\n",
       "      <td>4.6</td>\n",
       "      <td>360.0</td>\n",
       "      <td>{'lat': -6.125312399999999, 'lng': 106.8335377}</td>\n",
       "      <td>-6.125312</td>\n",
       "      <td>106.833538</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Taman Mini Indonesia Indah (TMII)</td>\n",
       "      <td>Taman Mini Indonesia Indah merupakan suatu kaw...</td>\n",
       "      <td>Taman Hiburan</td>\n",
       "      <td>Jakarta</td>\n",
       "      <td>10000</td>\n",
       "      <td>4.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>{'lat': -6.302445899999999, 'lng': 106.8951559}</td>\n",
       "      <td>-6.302446</td>\n",
       "      <td>106.895156</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Atlantis Water Adventure</td>\n",
       "      <td>Atlantis Water Adventure atau dikenal dengan A...</td>\n",
       "      <td>Taman Hiburan</td>\n",
       "      <td>Jakarta</td>\n",
       "      <td>94000</td>\n",
       "      <td>4.5</td>\n",
       "      <td>60.0</td>\n",
       "      <td>{'lat': -6.12419, 'lng': 106.839134}</td>\n",
       "      <td>-6.124190</td>\n",
       "      <td>106.839134</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Place_Id                         Place_Name  \\\n",
       "0         1                   Monumen Nasional   \n",
       "1         2                           Kota Tua   \n",
       "2         3                      Dunia Fantasi   \n",
       "3         4  Taman Mini Indonesia Indah (TMII)   \n",
       "4         5           Atlantis Water Adventure   \n",
       "\n",
       "                                         Description       Category     City  \\\n",
       "0  Monumen Nasional atau yang populer disingkat d...         Budaya  Jakarta   \n",
       "1  Kota tua di Jakarta, yang juga bernama Kota Tu...         Budaya  Jakarta   \n",
       "2  Dunia Fantasi atau disebut juga Dufan adalah t...  Taman Hiburan  Jakarta   \n",
       "3  Taman Mini Indonesia Indah merupakan suatu kaw...  Taman Hiburan  Jakarta   \n",
       "4  Atlantis Water Adventure atau dikenal dengan A...  Taman Hiburan  Jakarta   \n",
       "\n",
       "    Price  Rating  Time_Minutes  \\\n",
       "0   20000     4.6          15.0   \n",
       "1       0     4.6          90.0   \n",
       "2  270000     4.6         360.0   \n",
       "3   10000     4.5           NaN   \n",
       "4   94000     4.5          60.0   \n",
       "\n",
       "                                        Coordinate       Lat        Long  \\\n",
       "0          {'lat': -6.1753924, 'lng': 106.8271528} -6.175392  106.827153   \n",
       "1  {'lat': -6.137644799999999, 'lng': 106.8171245} -6.137645  106.817125   \n",
       "2  {'lat': -6.125312399999999, 'lng': 106.8335377} -6.125312  106.833538   \n",
       "3  {'lat': -6.302445899999999, 'lng': 106.8951559} -6.302446  106.895156   \n",
       "4             {'lat': -6.12419, 'lng': 106.839134} -6.124190  106.839134   \n",
       "\n",
       "   Unnamed: 11  Unnamed: 12  \n",
       "0          NaN            1  \n",
       "1          NaN            2  \n",
       "2          NaN            3  \n",
       "3          NaN            4  \n",
       "4          NaN            5  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.read_csv(r'dataset_recommendation_tourism\\tourism_with_id.csv')\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Place_Id', 'Place_Name', 'Description', 'Category', 'City', 'Price',\n",
       "       'Rating', 'Time_Minutes', 'Coordinate', 'Lat', 'Long', 'Unnamed: 11',\n",
       "       'Unnamed: 12'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Place_Id', 'Place_Name', 'Description', 'Category', 'City', 'Price',\n",
       "       'Rating'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = data.drop(['Time_Minutes', 'Coordinate',\n",
    "                'Lat', 'Long', 'Unnamed: 11',\n",
    "                'Unnamed: 12'], axis=1)\n",
    "data.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.to_csv('tourism_place.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 437 entries, 0 to 436\n",
      "Data columns (total 7 columns):\n",
      " #   Column       Non-Null Count  Dtype  \n",
      "---  ------       --------------  -----  \n",
      " 0   Place_Id     437 non-null    int64  \n",
      " 1   Place_Name   437 non-null    object \n",
      " 2   Description  437 non-null    object \n",
      " 3   Category     437 non-null    object \n",
      " 4   City         437 non-null    object \n",
      " 5   Price        437 non-null    int64  \n",
      " 6   Rating       437 non-null    float64\n",
      "dtypes: float64(1), int64(2), object(4)\n",
      "memory usage: 24.0+ KB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Place_Id</th>\n",
       "      <th>Place_Name</th>\n",
       "      <th>Description</th>\n",
       "      <th>Category</th>\n",
       "      <th>City</th>\n",
       "      <th>Price</th>\n",
       "      <th>Rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Monumen Nasional</td>\n",
       "      <td>Monumen Nasional atau yang populer disingkat d...</td>\n",
       "      <td>Budaya</td>\n",
       "      <td>Jakarta</td>\n",
       "      <td>20000</td>\n",
       "      <td>4.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Kota Tua</td>\n",
       "      <td>Kota tua di Jakarta, yang juga bernama Kota Tu...</td>\n",
       "      <td>Budaya</td>\n",
       "      <td>Jakarta</td>\n",
       "      <td>0</td>\n",
       "      <td>4.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Dunia Fantasi</td>\n",
       "      <td>Dunia Fantasi atau disebut juga Dufan adalah t...</td>\n",
       "      <td>Taman Hiburan</td>\n",
       "      <td>Jakarta</td>\n",
       "      <td>270000</td>\n",
       "      <td>4.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Taman Mini Indonesia Indah (TMII)</td>\n",
       "      <td>Taman Mini Indonesia Indah merupakan suatu kaw...</td>\n",
       "      <td>Taman Hiburan</td>\n",
       "      <td>Jakarta</td>\n",
       "      <td>10000</td>\n",
       "      <td>4.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Atlantis Water Adventure</td>\n",
       "      <td>Atlantis Water Adventure atau dikenal dengan A...</td>\n",
       "      <td>Taman Hiburan</td>\n",
       "      <td>Jakarta</td>\n",
       "      <td>94000</td>\n",
       "      <td>4.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Place_Id                         Place_Name  \\\n",
       "0         1                   Monumen Nasional   \n",
       "1         2                           Kota Tua   \n",
       "2         3                      Dunia Fantasi   \n",
       "3         4  Taman Mini Indonesia Indah (TMII)   \n",
       "4         5           Atlantis Water Adventure   \n",
       "\n",
       "                                         Description       Category     City  \\\n",
       "0  Monumen Nasional atau yang populer disingkat d...         Budaya  Jakarta   \n",
       "1  Kota tua di Jakarta, yang juga bernama Kota Tu...         Budaya  Jakarta   \n",
       "2  Dunia Fantasi atau disebut juga Dufan adalah t...  Taman Hiburan  Jakarta   \n",
       "3  Taman Mini Indonesia Indah merupakan suatu kaw...  Taman Hiburan  Jakarta   \n",
       "4  Atlantis Water Adventure atau dikenal dengan A...  Taman Hiburan  Jakarta   \n",
       "\n",
       "    Price  Rating  \n",
       "0   20000     4.6  \n",
       "1       0     4.6  \n",
       "2  270000     4.6  \n",
       "3   10000     4.5  \n",
       "4   94000     4.5  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "d:\\Data Science\\HACKATHON\\GEN AI LLAMA HACKTIV8\\llama_venv\\Lib\\site-packages\\sentence_transformers\\cross_encoder\\CrossEncoder.py:11: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from tqdm.autonotebook import tqdm, trange\n"
     ]
    }
   ],
   "source": [
    "import mysql.connector\n",
    "from mysql.connector import Error\n",
    "import ollama\n",
    "from sentence_transformers import SentenceTransformer\n",
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "from tqdm import tqdm\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "def connect_to_database():\n",
    "    try:\n",
    "        connection = mysql.connector.connect(\n",
    "            host=\"localhost\",\n",
    "            user=\"root\",\n",
    "            password=\"admin123\",\n",
    "            database=\"tourism_destination\"\n",
    "        )\n",
    "        return connection\n",
    "    except Error as e:\n",
    "        print(f\"Error: '{e}'\")\n",
    "        return None\n",
    "    \n",
    "    # Function to check if a column exists, and add it if necessary\n",
    "def add_embedding_column_if_not_exists(cursor):\n",
    "    # Check if the 'Embedding' column exists\n",
    "    cursor.execute(\"SHOW COLUMNS FROM places LIKE 'Embedding'\")\n",
    "    result = cursor.fetchone()\n",
    "    \n",
    "    # If the 'Embedding' column does not exist, add it\n",
    "    if not result:\n",
    "        print(\"Adding 'Embedding' column to the table...\")\n",
    "        cursor.execute(\"ALTER TABLE places ADD COLUMN Embedding TEXT\")\n",
    "        print(\"'Embedding' column added.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Adding 'Embedding' column to the table...\n"
     ]
    }
   ],
   "source": [
    "connection = connect_to_database()\n",
    "cursor = connection.cursor()\n",
    "add_embedding_column_if_not_exists(cursor)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "d:\\Data Science\\HACKATHON\\GEN AI LLAMA HACKTIV8\\llama_venv\\Lib\\site-packages\\transformers\\tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "# Koneksi ke MySQL\n",
    "def connect_to_database():\n",
    "    try:\n",
    "        connection = mysql.connector.connect(\n",
    "            host=\"localhost\",\n",
    "            user=\"root\",\n",
    "            password=\"admin123\",\n",
    "            database=\"tourism_destination\"\n",
    "        )\n",
    "        return connection\n",
    "    except Error as e:\n",
    "        print(f\"Error: '{e}'\")\n",
    "        return None\n",
    "\n",
    "\n",
    "\n",
    "# Compute and store embeddings\n",
    "def compute_and_store_embeddings():\n",
    "    model = SentenceTransformer('paraphrase-MiniLM-L6-v2')  \n",
    "\n",
    "    # Connect to the database\n",
    "    connection = connect_to_database()\n",
    "    if connection is None:\n",
    "        return\n",
    "    \n",
    "    cursor = connection.cursor(dictionary=True)\n",
    "    \n",
    "    # Select all places from the database\n",
    "    cursor.execute(\"SELECT Place_Id, Place_Name, Category, Description, City FROM places\")\n",
    "    places = cursor.fetchall()\n",
    "    \n",
    "    for place in places:\n",
    "        # Combine PlaceName, Category, Description, and City into one string\n",
    "        text = f\"{place['Place_Name']} {place['Category']} {place['Description']} {place['City']}\"\n",
    "        \n",
    "        # Generate embedding for the combined text\n",
    "        embedding = model.encode(text)\n",
    "        \n",
    "        # Convert embedding to a string format to store in the database\n",
    "        embedding_str = ','.join([str(x) for x in embedding])\n",
    "        \n",
    "        # Update the place in the database with the embedding\n",
    "        cursor.execute(\n",
    "            \"UPDATE places SET Embedding = %s WHERE Place_Id = %s\", \n",
    "            (embedding_str, place['Place_Id'])\n",
    "        )\n",
    "    \n",
    "    # Commit the changes and close the connection\n",
    "    connection.commit()\n",
    "    cursor.close()\n",
    "    connection.close()\n",
    "\n",
    "# Run the function to compute and store embeddings\n",
    "compute_and_store_embeddings()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "d:\\Data Science\\HACKATHON\\GEN AI LLAMA HACKTIV8\\llama_venv\\Lib\\site-packages\\transformers\\tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Top 5 Ranked Destinations:\n",
      "\n",
      "Pulau Semak Daun (Rating: 4.0, Similarity Score: 0.6861)\n",
      "Wisata Batu Kuda (Rating: 4.4, Similarity Score: 0.6839)\n",
      "Gedung Agung Yogyakarta (Rating: 4.6, Similarity Score: 0.6727)\n",
      "Taman Sungai Mudal (Rating: 4.6, Similarity Score: 0.6595)\n",
      "Grand Maerakaca (Rating: 4.4, Similarity Score: 0.6581)\n"
     ]
    }
   ],
   "source": [
    "# Koneksi ke MySQL\n",
    "def query_database():\n",
    "    try:\n",
    "        connection = mysql.connector.connect(\n",
    "            host=\"localhost\",\n",
    "            user=\"root\",\n",
    "            password=\"admin123\",\n",
    "            database=\"tourism_destination\"\n",
    "        )\n",
    "\n",
    "        if connection.is_connected():\n",
    "            cursor = connection.cursor(dictionary=True)\n",
    "            sql = \"SELECT * FROM places\"\n",
    "            cursor.execute(sql)\n",
    "            results = cursor.fetchall()\n",
    "            return results\n",
    "\n",
    "    except Error as e:\n",
    "        print(f\"Error: '{e}'\")\n",
    "    \n",
    "    finally:\n",
    "        if connection.is_connected():\n",
    "            cursor.close()\n",
    "            connection.close()\n",
    "\n",
    "# Get embedding from the database and calculate cosine similarity\n",
    "def get_similar_places(user_embedding, db_results):\n",
    "    similarities = []\n",
    "    \n",
    "    for place in db_results:\n",
    "        embedding_str = place['Embedding']  # Assuming embeddings are stored as comma-separated strings in the database\n",
    "        embedding = np.array([float(x) for x in embedding_str.split(',')])  # Convert the string back to a numpy array\n",
    "        \n",
    "        # Compute cosine similarity\n",
    "        similarity = cosine_similarity([user_embedding], [embedding])[0][0]\n",
    "        similarities.append((place, similarity))\n",
    "    \n",
    "    # Sort results based on similarity and then by rating\n",
    "    ranked_results = sorted(similarities, key=lambda x: (x[1], x[0]['Rating']), reverse=True)\n",
    "    \n",
    "    # Return top 5 places\n",
    "    return ranked_results[:5]\n",
    "\n",
    "# Ollama - Generate possible places (Retrieval Augmented Generation)\n",
    "def generate_rag_result(user_query):\n",
    "    prompt = f\"User Query: {user_query}\\n\\nPlease list 10 potential destinations based on user query:\"\n",
    "    \n",
    "    print(\"\\nGenerating results using Ollama (RAG)...\\n\")\n",
    "    with tqdm(total=10, desc=\"Processing RAG\") as pbar:\n",
    "        response = ollama.generate(model=\"llama3.1\", prompt=prompt)\n",
    "        pbar.update(5)\n",
    "    \n",
    "    # Process the response (assuming response structure is consistent)\n",
    "    print(\"Full response:\", response)\n",
    "    return response  # For now, we don't need to extract specific places, as similarity search will handle that\n",
    "\n",
    "# Main function to find the top 5 destinations\n",
    "def get_top_5_destinations(user_query):\n",
    "    # Step 1: Generate embedding for user query\n",
    "    model = SentenceTransformer('paraphrase-MiniLM-L6-v2')\n",
    "    user_embedding = model.encode(user_query)\n",
    "    \n",
    "    # Step 2: Fetch all places from the database\n",
    "    db_results = query_database()\n",
    "    if not db_results or len(db_results) == 0:\n",
    "        print(\"No data returned from database.\")\n",
    "        return\n",
    "    \n",
    "    # Step 3: Find the most similar places\n",
    "    top_5_places = get_similar_places(user_embedding, db_results)\n",
    "    \n",
    "    # Step 4: Display top 5 destinations\n",
    "    print(\"\\nTop 5 Ranked Destinations:\\n\")\n",
    "    for place, score in top_5_places:\n",
    "        print(f\"{place['Place_Name']} (Rating: {place['Rating']}, Similarity Score: {score:.4f})\")\n",
    "\n",
    "# Example user query\n",
    "user_query = \"Saya ingin ke Jogjakarta dan saya suka dengan pemandangan alam. kemana saya harus pergi?\"\n",
    "get_top_5_destinations(user_query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# prompt= \"do u know about LLM?\"\n",
    "# response = ollama.generate(model=\"llama3.1\", prompt=prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# response['response'].strip().split('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "d:\\Data Science\\HACKATHON\\GEN AI LLAMA HACKTIV8\\llama_venv\\Lib\\site-packages\\transformers\\tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Top 5 Ranked Destinations:\n",
      "\n",
      "Pulau Semak Daun (Rating: 4.0, Similarity Score: 0.6407)\n",
      "Jembatan Merah (Rating: 4.5, Similarity Score: 0.6401)\n",
      "Pasar Beringharjo (Rating: 4.5, Similarity Score: 0.6331)\n",
      "Gereja Perawan Maria Tak Berdosa Surabaya (Rating: 4.8, Similarity Score: 0.6286)\n",
      "Perpustakaan Nasional (Rating: 4.7, Similarity Score: 0.6256)\n"
     ]
    }
   ],
   "source": [
    "# Example user query\n",
    "user_query = \"Saya ingin ke Surabaya dan ingin berbelanja. kemana saya harus pergi?\"\n",
    "get_top_5_destinations(user_query)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llama_venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}