{ "cells": [ { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "import pandas as pd \n", "import requests \n", "import datetime as dt\n", "import re\n", "import json\n", "from tqdm import tqdm\n", "import os\n", "import time\n", "from openai import OpenAI" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Calculate" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "if \"OPENAI_API_KEY\" not in os.environ:\n", " with open('/home/sagemaker-user/Sciences-POC/config/secrets/keys.txt', 'r') as f:\n", " keys = json.loads(f.read())\n", "else : \n", " keys=os.environ" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "save_path = 'data/outputs'\n", "content_path = 'data/extract_sciences_po'\n", "\n", "\n", "def retrieve_classifications(name, mapping_prompt):\n", "\n", " df = pd.read_csv('data/extract_sciences_po.csv')\n", "\n", " if os.path.exists(f\"{save_path}/output_{name}.txt\"):\n", " with open(f\"{save_path}/output_{name}.txt\", 'r') as f : \n", " out_dict = json.loads(f.read())\n", " out_df = pd.DataFrame.from_dict(out_dict)\n", " out = out_dict\n", " else : \n", " out_df = pd.DataFrame(columns = ['item_id', 'categorie_principale', 'categorie_secondaire'])\n", " out = []\n", "\n", " df_to_process = df.loc[~df.item_id.isin(out_df.item_id)]\n", "\n", " if mapping_prompt[name]['client']=='deepseek':\n", " client = OpenAI(api_key=keys[\"DEEPSEEK_API_KEY\"], base_url=\"https://api.deepseek.com\")\n", " model=\"deepseek-chat\"\n", " else:\n", " client=OpenAI(api_key=\"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\")\n", " model=\"gpt-4o-mini\"\n", "\n", " with open(mapping_prompt[name]['path_prompt'], 'r') as f:\n", " prompt = f.read()\n", "\n", " if mapping_prompt[name]['client']=='openai-assistant':\n", " \n", " assistant = client.beta.assistants.create(\n", " name=\"News classifier\",\n", " instructions=prompt,\n", " response_format={ \"type\": \"json_object\"},\n", " model=\"gpt-4o-mini\",\n", " )\n", "\n", " assistant_id = assistant.id #mapping_prompt[name]['assistant_id']\n", "\n", " with tqdm(total=df_to_process.shape[0]) as pbar:\n", "\n", " for i, row in df_to_process.iterrows():\n", " titre_brut = f\"{row.item_id}_\"+row.titre.lower().strip().replace(f\"\\xa0\", ' ').replace(' : ', ':').replace(' ', '_').replace('/', '')\n", " \n", " with open(f'{content_path}/{titre_brut}.txt', 'r') as f:\n", " text = f.read()\n", "\n", " if mapping_prompt[name]['client']=='openai-assistant':\n", " \n", " # Step 1: Create a thread\n", " thread = client.beta.threads.create()\n", "\n", " # Step 2: Add a user message\n", " client.beta.threads.messages.create(\n", " thread_id=thread.id,\n", " role=\"user\",\n", " content=text\n", " )\n", "\n", " # Step 3: Run the assistant\n", " run = client.beta.threads.runs.create(\n", " thread_id=thread.id,\n", " assistant_id=assistant_id,\n", " )\n", "\n", " # Step 4: Wait for completion\n", " while True:\n", " run = client.beta.threads.runs.retrieve(\n", " thread_id=thread.id,\n", " run_id=run.id,\n", " )\n", " if run.status == \"completed\":\n", " break\n", " elif run.status in [\"failed\", \"cancelled\", \"expired\"]:\n", " raise Exception(f\"\"\"Run failed with status: {run.status}\\n\n", " {run}\"\"\")\n", " time.sleep(1)\n", "\n", " # Step 5: Get last assistant message only\n", " messages = client.beta.threads.messages.list(thread_id=thread.id)\n", " assistant_messages = [m for m in messages.data if m.role == \"assistant\"]\n", "\n", " if assistant_messages:\n", " # Get the most recent assistant message\n", " latest = assistant_messages[0]\n", " content = latest.content[0].text.value \n", " \n", " else:\n", " messages = [{\"role\": \"system\", \"content\": prompt},\n", " {\"role\": \"user\", \"content\": text}]\n", "\n", " response = client.chat.completions.create(\n", " model=model,\n", " messages=messages,\n", " response_format={\n", " 'type': 'json_object'\n", " }\n", " )\n", " content = response.choices[0].message.content\n", " try : \n", " cat_json = json.loads(content)\n", "\n", " out.append({\n", " 'item_id':row.item_id, \n", " 'categorie_principale': cat_json['categorie_principale'],\n", " 'categorie_secondaire': cat_json['categorie_secondaire'],\n", " })\n", " \n", " with open(f'{save_path}/output_{name}.txt', 'w+') as f : \n", " f.write(json.dumps(out))\n", "\n", " except Exception as e : \n", " print(f'Error with article {row.item_id}')\n", " pass\n", "\n", " \n", " pbar.update(1)\n", "\n" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "dimanov_et_al\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 6%|▌ | 29/509 [03:56<1:21:08, 10.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article F5TBC6SGHRGRFJGZXZYG73I2C4\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 13%|█▎ | 65/509 [09:58<1:19:31, 10.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article PCZQ5Q2SHJFHZANPXZW2CUQVWU\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 15%|█▍ | 75/509 [10:54<39:59, 5.53s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article KCFRPP6YOJEMZMUEP55JMHKPCI\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 16%|█▌ | 81/509 [11:28<39:46, 5.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article AQ3WGXNGMFC7TF5NO73LD2AQBY\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 17%|█▋ | 87/509 [12:09<46:30, 6.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article KAO3B4ZMY5HP3NMHSSORBBHEVQ\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 20%|█▉ | 101/509 [14:32<2:06:30, 18.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article OKZW3KTDFNHTDIOZSJTUUWTECM\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 21%|██ | 106/509 [15:06<58:56, 8.78s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article UBNN4MNV35BCVICDFZVGQEAPYU\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 21%|██ | 108/509 [15:21<53:29, 8.00s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article VWT2DL4B2RESPHBN5GLSRSKCXA\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 21%|██▏ | 109/509 [15:26<46:12, 6.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article NE7X4E4S7BGCVN42KSQHYGPR5M\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 28%|██▊ | 140/509 [20:16<43:06, 7.01s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article 7MSP4B5UWFDURJY2ZA4E26D6Y4\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 28%|██▊ | 141/509 [20:21<39:19, 6.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article 5GQ4EE3DBZBJ3FOP2J3FHRAW6Y\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 35%|███▌ | 180/509 [25:47<45:26, 8.29s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article FOMV5KH4UZBFNJHHOTLIRNBTNA\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 36%|███▌ | 182/509 [25:58<36:52, 6.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article KXP2DEGANVHOXNRBFFU2X44KME\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 39%|███▊ | 197/509 [28:03<50:13, 9.66s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article HIY7OM5AGFF7NAWUFKDSFKBIQU\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 40%|███▉ | 202/509 [29:15<1:19:59, 15.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article GK5PBHMGJNGH7IC6TRV7BPISRI\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 46%|████▌ | 235/509 [33:50<26:15, 5.75s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article WHQXZPRU4ZGNHHFODI5AF4QREQ\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 48%|████▊ | 245/509 [35:12<39:31, 8.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article JMNDPKJDOBGDPOVOT3B2TMEI3Q\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 53%|█████▎ | 272/509 [40:57<28:50, 7.30s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article OY5WOZYJJFCUXAY2IP3MDF5DBI\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 54%|█████▍ | 276/509 [41:21<24:21, 6.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article 4UXLV4RIYRGI3LLOJ4VIFIS3PU\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 56%|█████▌ | 286/509 [43:40<43:09, 11.61s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article BDUEDA6Q5VFA5JVZUYKANSBEJU\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 58%|█████▊ | 297/509 [45:06<30:24, 8.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article LTW4AQF5ZJFA5IYGRMTYK5KYYA\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 74%|███████▎ | 375/509 [56:16<19:53, 8.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article TYDKDANLIVFMXMGT6QNNLZXFT4\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 74%|███████▍ | 376/509 [56:22<17:48, 8.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article B62VZRCINRBWTI2ZP5KSJCALQY\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 76%|███████▋ | 389/509 [58:22<17:08, 8.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article NEIWXLDUOBFG5P7N3QJFXKRWKU\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 77%|███████▋ | 391/509 [58:38<15:53, 8.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article Y4SGMAXQGJFA3EIXQM2P4ULYAA\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 77%|███████▋ | 392/509 [58:49<17:23, 8.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article 2GHECSGTINFFLLDNFB6GWBWT74\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 82%|████████▏ | 416/509 [1:03:07<16:25, 10.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article A76MZTTKFFDNTEBAHEFQR3YMWA\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 82%|████████▏ | 418/509 [1:03:34<17:11, 11.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article RVAAO6SOURFWXLYT5UGAZQMFDI\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 84%|████████▎ | 426/509 [1:04:50<14:42, 10.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article ZPJYQHA4YZA7XJVVYINHDXY52Y\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 84%|████████▍ | 429/509 [1:06:26<32:25, 24.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article HQWSHI5H3ZFWRC6CR3EGK2CWOU\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 85%|████████▍ | 431/509 [1:06:40<19:53, 15.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article UJCAG7SOBRC4DB4GD3YRYKHYJE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 91%|█████████ | 462/509 [1:13:31<10:16, 13.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article CSVWJ7KVPBHLPH4LGTSWPYA5IE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 95%|█████████▌| 484/509 [1:17:49<04:09, 9.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article BQ6E3KG74ZFQPEHRYVAUUDLTRY\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 97%|█████████▋| 492/509 [1:18:53<01:56, 6.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article VC2YC2LPWRA2ZGM6DM3JWZKVHY\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 99%|█████████▉| 505/509 [1:21:06<00:41, 10.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Error with article AOT254SA2VDIDNF4YW7XPLWJ5E\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 509/509 [1:22:17<00:00, 9.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "sans_titre_1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "ename": "KeyError", "evalue": "'DEEPSEEK_API_KEY'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[28], line 6\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m mapping\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(name)\n\u001b[0;32m----> 6\u001b[0m \u001b[43mretrieve_classifications\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapping\u001b[49m\u001b[43m)\u001b[49m\n", "Cell \u001b[0;32mIn[27], line 21\u001b[0m, in \u001b[0;36mretrieve_classifications\u001b[0;34m(name, mapping_prompt)\u001b[0m\n\u001b[1;32m 18\u001b[0m df_to_process \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mloc[\u001b[38;5;241m~\u001b[39mdf\u001b[38;5;241m.\u001b[39mitem_id\u001b[38;5;241m.\u001b[39misin(out_df\u001b[38;5;241m.\u001b[39mitem_id)]\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mapping_prompt[name][\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mclient\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m==\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdeepseek\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[0;32m---> 21\u001b[0m client \u001b[38;5;241m=\u001b[39m OpenAI(api_key\u001b[38;5;241m=\u001b[39m\u001b[43mkeys\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mDEEPSEEK_API_KEY\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m, base_url\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhttps://api.deepseek.com\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 22\u001b[0m model\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdeepseek-chat\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", "File \u001b[0;32m:679\u001b[0m, in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n", "\u001b[0;31mKeyError\u001b[0m: 'DEEPSEEK_API_KEY'" ] } ], "source": [ "with open('config/mapping_prompts.txt', 'r') as f : \n", " mapping = json.loads(f.read())\n", "\n", "for name in mapping.keys():\n", " print(name)\n", " retrieve_classifications(name, mapping)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "articles = pd.read_csv('data/extract_sciences_po.csv')\n", "\n", "with open(\"data/outputs/output_favarel_et_al.txt\", 'r') as f : \n", " out_dict = json.loads(f.read())\n", "\n", "\n", "df = pd.DataFrame.from_dict(out_dict)\n", "\n", "articles = pd.merge(df, articles, on='item_id', how='left')\n", "\n", "count_principale = df.groupby('categorie_principale').item_id.count()\n", "df['categorie_secondaire'] = df.apply(lambda x : x.categorie_secondaire.split(',')[0] if x.categorie_secondaire!=None else None, axis=1)\n", "\n", " " ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "application/vnd.microsoft.datawrangler.viewer.v0+json": { "columns": [ { "name": "index", "rawType": "int64", "type": "integer" }, { "name": "item_id", "rawType": "object", "type": "string" }, { "name": "categorie_principale", "rawType": "object", "type": "string" }, { "name": "categorie_secondaire", "rawType": "object", "type": "string" } ], "conversionMethod": "pd.DataFrame", "ref": "224a4e83-124d-4710-9d6a-6deb122e17de", "rows": [ [ "0", "I4OEKQ6MHRBP3LQVVYDDXW6T6U", "UPDATE ME", "EDUCATE ME" ], [ "1", "LVA4MZBQOBECNPZD323NV6O7K4", "INSPIRE ME", "EDUCATE ME" ], [ "2", "4FAEHUUZ5ZFAJKLFEV2LT5CBAQ", "EDUCATE ME", "GIVE ME PERSPECTIVE" ], [ "3", "4S4G6BKFRNER3LB22CLPAEWWKY", "GIVE ME PERSPECTIVE", "INSPIRE ME" ], [ "4", "ZAFHRNAHJVC6THXRSBMCB4A24I", "INSPIRE ME", "EDUCATE ME" ], [ "5", "ESEADAHDJVGQRC2S3JHSZEVWHM", "UPDATE ME", "GIVE ME PERSPECTIVE" ], [ "6", "FZHZS32EOZG5FAK7IKOJIB3J4I", "EDUCATE ME", "GIVE ME PERSPECTIVE" ], [ "7", "H4JJGHBTL5DDFNNNETBE5T7FLI", "UPDATE ME", "EDUCATE ME" ], [ "8", "JTND25UK2NA6HL2DVJRX46VCVM", "EDUCATE ME", "GIVE ME PERSPECTIVE" ], [ "9", "LFRXBXICUFFULNKZUG4JN4E2NY", "INSPIRE ME", "EDUCATE ME" ], [ "10", "O5546IGISVGOLAPLOUIXAHXATE", "EDUCATE ME", "GIVE ME PERSPECTIVE" ], [ "11", "5LRGJN3CONBZ3OIGARTGZX7X34", "UPDATE ME", "GIVE ME PERSPECTIVE" ], [ "12", "A2CXALUPDVHDZP22PBL4YMMGVU", "EDUCATE ME", "GIVE ME PERSPECTIVE" ], [ "13", "6T5ESGV6CNFA5L3TIEO475EO2I", "GIVE ME PERSPECTIVE", "EDUCATE ME" ], [ "14", "KGJGC5V6OJCJJEVO4D3MP5QA4Q", "EDUCATE ME", "GIVE ME PERSPECTIVE" ], [ "15", "ASBHDD73HZANDM5W3WRKXGCCGI", "KEEP ME ON TREND", "EDUCATE ME" ], [ "16", "YL7UVGJLQZALVK5PCV2LZLPTDQ", "INSPIRE ME", "ENTERTAIN ME" ], [ "17", "2T6XZA7OHRD6RFCGPA3ZVCNJ7A", "GIVE ME PERSPECTIVE", "EDUCATE ME" ], [ "18", "V3THEIGVXBDADLDXUKJZTRLLUQ", "UPDATE ME", "GIVE ME PERSPECTIVE" ], [ "19", "Z3WQ6BXPKRATPCGSKMROOOMQ44", "UPDATE ME", "EDUCATE ME" ], [ "20", "TUR55MJTKRA7TNJOOP62XW7NIM", "INSPIRE ME", "EDUCATE ME" ], [ "21", "YGI5VTCGIJFDNOA5C4G5BOITH4", "EDUCATE ME", "GIVE ME PERSPECTIVE" ], [ "22", "S2FVMMUVK5BS3HK5YN7PNU2LZE", "UPDATE ME", "GIVE ME PERSPECTIVE" ], [ "23", "ZASS6FXGGVGCHFSSZKZXQ4AG2U", "EDUCATE ME", "INSPIRE ME" ], [ "24", "2AZTSRRBUNBHNNHU5DCMRGG6WY", "GIVE ME PERSPECTIVE", "EDUCATE ME" ], [ "25", "AKDZ7PAIQVBN5OENKR5AQ6YGNI", "KEEP ME ON TREND", "ENTERTAIN ME" ], [ "26", "M4RJIWTMK5BVPAWNFEHGXZSLRY", "INSPIRE ME", "ENTERTAIN ME" ], [ "27", "WLTP7ZAZ7JFUDJG2LNO5E7APZA", "EDUCATE ME", "GIVE ME PERSPECTIVE" ], [ "28", "DCY4O24FUBAZZB3KOTIMMBA6MY", "EDUCATE ME", "GIVE ME PERSPECTIVE" ], [ "29", "FHFB7Z3XYBH25LO7XDGHAEQ7N4", "UPDATE ME", "EDUCATE ME" ], [ "30", "BPNRZFZPAVA3HPUOXUKXEUR5RY", "EDUCATE ME", "GIVE ME PERSPECTIVE" ], [ "31", "NWGRYEOIQ5ADRDRXYZCU5C3VDA", "INSPIRE ME", "EDUCATE ME" ], [ "32", "GORDMTE47FA5FEUA3U76M5VPOY", "EDUCATE ME", "UPDATE ME" ], [ "33", "XFBX7NDGN5CMPL3OU5ZXFF6YCE", "UPDATE ME", "EDUCATE ME" ], [ "34", "VYEPVCBK4ZCV7D3SW35GZ2Y5DM", "EDUCATE ME", "INSPIRE ME" ], [ "35", "F5TBC6SGHRGRFJGZXZYG73I2C4", "EDUCATE ME", "UPDATE ME" ], [ "36", "ZFZG6QOFBRFE3BL4VCI72HFQKQ", "GIVE ME PERSPECTIVE", "EDUCATE ME" ], [ "37", "ZGPFATVT4ZFJZMX7LDXE3VS7RM", "INSPIRE ME", "GIVE ME PERSPECTIVE" ], [ "38", "UBVFCLZ6OZCP5DAAYCBW2CDPJI", "EDUCATE ME", "GIVE ME PERSPECTIVE" ], [ "39", "WYEBGIAY5JAUFLB4GH6IXVYP34", "UPDATE ME", "EDUCATE ME" ], [ "40", "DXW7WX7ALVFBZNPFU6QKHEH3G4", "UPDATE ME", "EDUCATE ME" ], [ "41", "QOFNZRU2TVES7GMVVPMZ5LSI2E", "EDUCATE ME", "INSPIRE ME" ], [ "42", "WBJI7FFINZHCPC5QZL2NFY6XWE", "INSPIRE ME", "EDUCATE ME" ], [ "43", "IYAAHFU3PVFOPAN6XIJDPQ5XLQ", "UPDATE ME", "EDUCATE ME" ], [ "44", "SNYUYV4G5BAODBTQWJBRG5JZKM", "EDUCATE ME", "GIVE ME PERSPECTIVE" ], [ "45", "PMBTTRISTNDN3P3ANDECQKSEDE", "EDUCATE ME", "GIVE ME PERSPECTIVE" ], [ "46", "UHYPJFKFJNHVPHVRFV7OHNDU64", "GIVE ME PERSPECTIVE", "EDUCATE ME" ], [ "47", "ESRPEZYCA5AG7HIXUIVMPHDFVQ", "EDUCATE ME", "GIVE ME PERSPECTIVE" ], [ "48", "HQW7TUOY7NHUVMCLNSDSEY5X4I", "KEEP ME ON TREND", "EDUCATE ME" ], [ "49", "C3BA2XUBXRG5RBNHJUSDUKP4RI", "ENTERTAIN ME", "KEEP ME ON TREND" ] ], "shape": { "columns": 3, "rows": 516 } }, "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
item_idcategorie_principalecategorie_secondaire
0I4OEKQ6MHRBP3LQVVYDDXW6T6UUPDATE MEEDUCATE ME
1LVA4MZBQOBECNPZD323NV6O7K4INSPIRE MEEDUCATE ME
24FAEHUUZ5ZFAJKLFEV2LT5CBAQEDUCATE MEGIVE ME PERSPECTIVE
34S4G6BKFRNER3LB22CLPAEWWKYGIVE ME PERSPECTIVEINSPIRE ME
4ZAFHRNAHJVC6THXRSBMCB4A24IINSPIRE MEEDUCATE ME
............
511AOT254SA2VDIDNF4YW7XPLWJ5EINSPIRE MEENTERTAIN ME
512GUOUKHLPFZBK7GVR5XU7MXVD5AINSPIRE MEEDUCATE ME
5135HT6C24ZBVDOBFXPLA4HNVOTT4EDUCATE MEUPDATE ME
514VLV6RSQ6U5E6XJ6AIRV26AEKO4UPDATE MEEDUCATE ME
515FVCJ6DQ5HVDNDGC4F6F276NVFMUPDATE MEGIVE ME PERSPECTIVE
\n", "

516 rows × 3 columns

\n", "
" ], "text/plain": [ " item_id categorie_principale categorie_secondaire\n", "0 I4OEKQ6MHRBP3LQVVYDDXW6T6U UPDATE ME EDUCATE ME\n", "1 LVA4MZBQOBECNPZD323NV6O7K4 INSPIRE ME EDUCATE ME\n", "2 4FAEHUUZ5ZFAJKLFEV2LT5CBAQ EDUCATE ME GIVE ME PERSPECTIVE\n", "3 4S4G6BKFRNER3LB22CLPAEWWKY GIVE ME PERSPECTIVE INSPIRE ME\n", "4 ZAFHRNAHJVC6THXRSBMCB4A24I INSPIRE ME EDUCATE ME\n", ".. ... ... ...\n", "511 AOT254SA2VDIDNF4YW7XPLWJ5E INSPIRE ME ENTERTAIN ME\n", "512 GUOUKHLPFZBK7GVR5XU7MXVD5A INSPIRE ME EDUCATE ME\n", "513 5HT6C24ZBVDOBFXPLA4HNVOTT4 EDUCATE ME UPDATE ME\n", "514 VLV6RSQ6U5E6XJ6AIRV26AEKO4 UPDATE ME EDUCATE ME\n", "515 FVCJ6DQ5HVDNDGC4F6F276NVFM UPDATE ME GIVE ME PERSPECTIVE\n", "\n", "[516 rows x 3 columns]" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Ajouter images" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 2 }