{"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3","language":"python"},"language_info":{"name":"python","version":"3.10.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"vscode":{"interpreter":{"hash":"57bc2b6ce032b5f0e93daa91901b7ea38a856826ef43aa9e95b6d3999f5310df"}},"kaggle":{"accelerator":"gpu","dataSources":[{"sourceId":7273989,"sourceType":"datasetVersion","datasetId":4213751}],"dockerImageVersionId":30627,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# Importing the required libraries","metadata":{}},{"cell_type":"code","source":"import torch\nimport pandas as pd\nimport numpy as np\nimport os\nimport warnings\nimport matplotlib.pyplot as plt\n\nfrom transformers import AutoTokenizer, AutoModelForSequenceClassification, DistilBertForSequenceClassification, AutoModelForSeq2SeqLM\nfrom tqdm import tqdm\nfrom torchvision import models\nfrom torchvision.transforms import v2\nfrom torch.utils.data import Dataset, DataLoader\nfrom keras.preprocessing import image\nfrom torchmetrics.classification import MultilabelF1Score\nfrom sklearn.metrics import average_precision_score, ndcg_score","metadata":{"execution":{"iopub.status.busy":"2023-12-24T20:24:21.946543Z","iopub.execute_input":"2023-12-24T20:24:21.946928Z","iopub.status.idle":"2023-12-24T20:24:39.075031Z","shell.execute_reply.started":"2023-12-24T20:24:21.946896Z","shell.execute_reply":"2023-12-24T20:24:39.074021Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n warnings.warn(_BETA_TRANSFORMS_WARNING)\n/opt/conda/lib/python3.10/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n warnings.warn(_BETA_TRANSFORMS_WARNING)\n/opt/conda/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.24.3\n warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n","output_type":"stream"}]},{"cell_type":"markdown","source":"### Setting up the environment\n***","metadata":{}},{"cell_type":"code","source":"warnings.filterwarnings(\"ignore\")","metadata":{"execution":{"iopub.status.busy":"2023-12-24T20:24:39.077016Z","iopub.execute_input":"2023-12-24T20:24:39.077764Z","iopub.status.idle":"2023-12-24T20:24:39.082480Z","shell.execute_reply.started":"2023-12-24T20:24:39.077728Z","shell.execute_reply":"2023-12-24T20:24:39.081472Z"},"trusted":true},"execution_count":2,"outputs":[]},{"cell_type":"markdown","source":"***","metadata":{}},{"cell_type":"markdown","source":"# Data Preprocessing","metadata":{}},{"cell_type":"code","source":"genres = [\"Crime\", \"Thriller\", \"Fantasy\", \"Horror\", \"Sci-Fi\", \"Comedy\", \"Documentary\", \"Adventure\", \"Film-Noir\", \"Animation\", \"Romance\", \"Drama\", \"Western\", \"Musical\", \"Action\", \"Mystery\", \"War\", \"Children\\'s\"]\nmapping = {}\nfor i in range(len(genres)):\n mapping[i] = genres[i]\nmapping","metadata":{"execution":{"iopub.status.busy":"2023-12-24T20:24:39.083738Z","iopub.execute_input":"2023-12-24T20:24:39.084396Z","iopub.status.idle":"2023-12-24T20:24:39.102308Z","shell.execute_reply.started":"2023-12-24T20:24:39.084362Z","shell.execute_reply":"2023-12-24T20:24:39.101422Z"},"trusted":true},"execution_count":3,"outputs":[{"execution_count":3,"output_type":"execute_result","data":{"text/plain":"{0: 'Crime',\n 1: 'Thriller',\n 2: 'Fantasy',\n 3: 'Horror',\n 4: 'Sci-Fi',\n 5: 'Comedy',\n 6: 'Documentary',\n 7: 'Adventure',\n 8: 'Film-Noir',\n 9: 'Animation',\n 10: 'Romance',\n 11: 'Drama',\n 12: 'Western',\n 13: 'Musical',\n 14: 'Action',\n 15: 'Mystery',\n 16: 'War',\n 17: \"Children's\"}"},"metadata":{}}]},{"cell_type":"markdown","source":"***","metadata":{}},{"cell_type":"code","source":"trainset = pd.read_csv('/kaggle/input/ml-dataset-2023s1/trainset.csv')\ntestset = pd.read_csv('/kaggle/input/ml-dataset-2023s1/testset.csv')\ntrainset.label = trainset.label.apply(lambda x: eval(x))\ntestset.label = testset.label.apply(lambda x: eval(x))\ntrainset.img_path = trainset.img_path.apply(lambda x: x.replace('\\\\', '/'))\ntestset.img_path = testset.img_path.apply(lambda x: x.replace('\\\\', '/'))","metadata":{"execution":{"iopub.status.busy":"2023-12-24T20:24:39.104625Z","iopub.execute_input":"2023-12-24T20:24:39.104937Z","iopub.status.idle":"2023-12-24T20:24:39.286604Z","shell.execute_reply.started":"2023-12-24T20:24:39.104903Z","shell.execute_reply":"2023-12-24T20:24:39.285646Z"},"trusted":true},"execution_count":4,"outputs":[]},{"cell_type":"code","source":"print(len(trainset), len(testset))","metadata":{"execution":{"iopub.status.busy":"2023-12-24T20:24:39.287752Z","iopub.execute_input":"2023-12-24T20:24:39.288008Z","iopub.status.idle":"2023-12-24T20:24:39.292769Z","shell.execute_reply.started":"2023-12-24T20:24:39.287985Z","shell.execute_reply":"2023-12-24T20:24:39.291911Z"},"trusted":true},"execution_count":5,"outputs":[{"name":"stdout","text":"3106 777\n","output_type":"stream"}]},{"cell_type":"code","source":"tokenizer_gen = AutoTokenizer.from_pretrained(\"MBZUAI/LaMini-Flan-T5-248M\")\nmodel_gen = AutoModelForSeq2SeqLM.from_pretrained(\"MBZUAI/LaMini-Flan-T5-248M\")","metadata":{"execution":{"iopub.status.busy":"2023-12-24T20:24:39.294043Z","iopub.execute_input":"2023-12-24T20:24:39.294392Z","iopub.status.idle":"2023-12-24T20:24:50.312836Z","shell.execute_reply.started":"2023-12-24T20:24:39.294360Z","shell.execute_reply":"2023-12-24T20:24:50.311944Z"},"trusted":true},"execution_count":6,"outputs":[{"output_type":"display_data","data":{"text/plain":"tokenizer_config.json: 0%| | 0.00/2.50k [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"e9e15dbfbbdb420887c6c99f416b98d5"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"spiece.model: 0%| | 0.00/792k [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"38809e386306424c9db375f5c63ddd1b"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer.json: 0%| | 0.00/2.42M [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"9f4a77264da7421fb65eb048ec48ed38"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"special_tokens_map.json: 0%| | 0.00/2.20k [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"54e22652f35644a5b9c3667a8b9757c7"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"config.json: 0%| | 0.00/1.53k [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"31dbd98619574f6b826fbbd0bf4dd60a"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"pytorch_model.bin: 0%| | 0.00/990M [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"2fe28915a8cb47ce92db01be9e53e62c"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"generation_config.json: 0%| | 0.00/142 [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"07ca22d143154c23a743975118441d89"}},"metadata":{}}]},{"cell_type":"code","source":"def generate_plot(df: pd.DataFrame, model: AutoModelForSeq2SeqLM, tokenizer: AutoTokenizer, device) -> pd.DataFrame:\n quote = 'What is the story of the movie {}?'\n model_gen.to(device)\n model_gen.eval()\n\n for i in tqdm(range(len(df))):\n with torch.no_grad():\n input_ids = tokenizer(quote.format(df.title[i]), return_tensors='pt').input_ids.to(device)\n output = model.generate(input_ids, max_length=256, do_sample=True, temperature=0.09)\n df.loc[i, 'plot'] = tokenizer.decode(output[0], skip_special_tokens=True)\n return df","metadata":{"execution":{"iopub.status.busy":"2023-12-24T20:24:50.313910Z","iopub.execute_input":"2023-12-24T20:24:50.314190Z","iopub.status.idle":"2023-12-24T20:24:50.321223Z","shell.execute_reply.started":"2023-12-24T20:24:50.314164Z","shell.execute_reply":"2023-12-24T20:24:50.320122Z"},"trusted":true},"execution_count":7,"outputs":[]},{"cell_type":"code","source":"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')","metadata":{"execution":{"iopub.status.busy":"2023-12-24T20:24:50.322307Z","iopub.execute_input":"2023-12-24T20:24:50.322558Z","iopub.status.idle":"2023-12-24T20:24:50.358375Z","shell.execute_reply.started":"2023-12-24T20:24:50.322534Z","shell.execute_reply":"2023-12-24T20:24:50.357298Z"},"trusted":true},"execution_count":8,"outputs":[]},{"cell_type":"code","source":"# trainset = generate_plot(trainset, model_gen, tokenizer_gen, device)\n# testset = generate_plot(testset, model_gen, tokenizer_gen, device)","metadata":{"execution":{"iopub.status.busy":"2023-12-24T20:24:50.359654Z","iopub.execute_input":"2023-12-24T20:24:50.359944Z","iopub.status.idle":"2023-12-24T20:24:50.369094Z","shell.execute_reply.started":"2023-12-24T20:24:50.359918Z","shell.execute_reply":"2023-12-24T20:24:50.368105Z"},"trusted":true},"execution_count":9,"outputs":[]},{"cell_type":"markdown","source":"# Model Implementation","metadata":{}},{"cell_type":"markdown","source":"### Sub-models\n***","metadata":{}},{"cell_type":"code","source":"tokenizer1 = AutoTokenizer.from_pretrained(\"distilbert-base-uncased\")\nmodel1 = DistilBertForSequenceClassification .from_pretrained(\"distilbert-base-uncased\", problem_type=\"multi_label_classification\", num_labels=18)\nmodel1.config.id2label = mapping\n\ntokenizer2 = AutoTokenizer.from_pretrained(\"dduy193/plot-classification\")\nmodel2 = AutoModelForSequenceClassification.from_pretrained(\"dduy193/plot-classification\")\nmodel2.config.id2label = mapping\n\nmodel3 = models.resnet101(pretrained=False)\nmodel3.fc = torch.nn.Linear(2048, len(genres))\n\ndevice = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\nmodel1.to(device)\nmodel2.to(device)\nmodel3.to(device)\ndevice","metadata":{"execution":{"iopub.status.busy":"2023-12-24T20:24:50.372505Z","iopub.execute_input":"2023-12-24T20:24:50.372779Z","iopub.status.idle":"2023-12-24T20:25:02.893172Z","shell.execute_reply.started":"2023-12-24T20:24:50.372756Z","shell.execute_reply":"2023-12-24T20:25:02.892101Z"},"trusted":true},"execution_count":10,"outputs":[{"output_type":"display_data","data":{"text/plain":"tokenizer_config.json: 0%| | 0.00/28.0 [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"2060918e07ea4f02ade0210bb57c6fcb"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"config.json: 0%| | 0.00/483 [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"38dc266d2e474345a7ae8bd86b65f803"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"vocab.txt: 0%| | 0.00/232k [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"1110cd2434e54504817ca4be7488b161"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer.json: 0%| | 0.00/466k [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"e6a8340f64e44489b049319c7c81363c"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"model.safetensors: 0%| | 0.00/268M [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"9ace0fba2fe546ce8576aca2437795eb"}},"metadata":{}},{"name":"stderr","text":"Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.weight', 'pre_classifier.bias']\nYou should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"tokenizer_config.json: 0%| | 0.00/1.20k [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"4eaed633fafe4d758cb8675999e5104c"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"vocab.txt: 0%| | 0.00/232k [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"3a5e902fcbda4fa09f1e5a5dfa9dc289"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer.json: 0%| | 0.00/712k [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"33043dabaa4447ac8824784c6d568627"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"special_tokens_map.json: 0%| | 0.00/125 [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"fd054639c5d7483da861657f0f8c03ca"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"config.json: 0%| | 0.00/1.36k [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"06725a0c49444e2eaf4fb5b39b44711c"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"model.safetensors: 0%| | 0.00/268M [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"04521f8014a347779a3f746aa38247bc"}},"metadata":{}},{"execution_count":10,"output_type":"execute_result","data":{"text/plain":"device(type='cuda')"},"metadata":{}}]},{"cell_type":"markdown","source":"### Deep Fusion Multimodal Model\n***","metadata":{}},{"cell_type":"code","source":"class Multimodal(torch.nn.Module):\n def __init__(self, model1, model2, model3):\n super().__init__()\n self.model1 = model1\n self.model2 = model2\n self.model3 = model3\n self.fc1 = torch.nn.Linear(18, 18)\n self.fc2 = torch.nn.Linear(18, 18)\n self.fc3 = torch.nn.Linear(18, 18)\n\n def forward(self, \n title_input_ids, title_attention_mask,\n plot_input_ids, plot_attention_mask,\n image_input):\n title_output = self.model1(title_input_ids, title_attention_mask)\n plot_output = self.model2(plot_input_ids, plot_attention_mask)\n image_output = self.model3(image_input)\n\n title_output = self.fc1(title_output.logits)\n plot_output = self.fc2(plot_output.logits)\n image_output = self.fc3(image_output)\n \n output = torch.add(title_output, plot_output)\n output = torch.add(output, image_output)\n return output","metadata":{"execution":{"iopub.status.busy":"2023-12-24T20:25:02.894461Z","iopub.execute_input":"2023-12-24T20:25:02.894756Z","iopub.status.idle":"2023-12-24T20:25:02.903181Z","shell.execute_reply.started":"2023-12-24T20:25:02.894730Z","shell.execute_reply":"2023-12-24T20:25:02.901879Z"},"trusted":true},"execution_count":11,"outputs":[]},{"cell_type":"markdown","source":"# Custom Datasets & Data Loaders","metadata":{}},{"cell_type":"markdown","source":"***\n### Custom Dataset\n***","metadata":{}},{"cell_type":"code","source":"class Poroset(torch.utils.data.Dataset):\n def __init__(self, df, \n tokenizer1, tokenizer2, \n max_len1=64, max_len2=256,\n device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')):\n self.df = df\n self.tokenizer1 = tokenizer1\n self.tokenizer2 = tokenizer2\n self.max_len1 = max_len1\n self.max_len2 = max_len2\n self.device = device\n self.transform = v2.Compose([\n v2.Resize((224, 224)),\n v2.ToTensor(),\n v2.Normalize(mean=[0.485, 0.456, 0.406],\n std=[0.229, 0.224, 0.225])\n ])\n\n def __len__(self):\n return len(self.df)\n \n def __getitem__(self, idx):\n row = self.df.iloc[idx]\n \n title = row['title']\n # Truncate title if it is too long\n if len(title) > self.max_len1:\n title = title[:self.max_len1]\n\n plot = row['plot']\n # Truncate plot if it is too long\n if len(plot) > self.max_len2:\n plot = plot[:self.max_len2]\n\n label = row['label']\n title_encoding = self.tokenizer1(title, truncation=True, padding='max_length', max_length=self.max_len1, return_tensors='pt')\n plot_encoding = self.tokenizer2(plot, truncation=True, padding='max_length', max_length=self.max_len2, return_tensors='pt')\n \n image_path = '/kaggle/input/ml-dataset-2023s1/ml1m/' + row['img_path']\n if os.path.exists(image_path):\n image_input = image.load_img(image_path)\n image_input = self.transform(image_input)\n else:\n image_input = torch.zeros((3, 224, 224))\n \n return {\n 'title': title,\n 'plot': plot,\n 'title_input_ids': title_encoding['input_ids'].squeeze(),\n 'title_attention_mask': title_encoding['attention_mask'].squeeze(),\n 'plot_input_ids': plot_encoding['input_ids'].squeeze(),\n 'plot_attention_mask': plot_encoding['attention_mask'].squeeze(),\n 'image_input': image_input,\n 'label': torch.FloatTensor(label)\n }","metadata":{"execution":{"iopub.status.busy":"2023-12-24T20:25:02.904483Z","iopub.execute_input":"2023-12-24T20:25:02.904756Z","iopub.status.idle":"2023-12-24T20:25:02.920222Z","shell.execute_reply.started":"2023-12-24T20:25:02.904732Z","shell.execute_reply":"2023-12-24T20:25:02.919401Z"},"trusted":true},"execution_count":12,"outputs":[]},{"cell_type":"code","source":"trainset.head()","metadata":{"execution":{"iopub.status.busy":"2023-12-24T20:25:02.921249Z","iopub.execute_input":"2023-12-24T20:25:02.921523Z","iopub.status.idle":"2023-12-24T20:25:02.948147Z","shell.execute_reply.started":"2023-12-24T20:25:02.921499Z","shell.execute_reply":"2023-12-24T20:25:02.947223Z"},"trusted":true},"execution_count":13,"outputs":[{"execution_count":13,"output_type":"execute_result","data":{"text/plain":" title img_path \\\n0 Washington Square (1997) ml1m/content/dataset/ml1m-images/1650.jpg \n1 Net, The (1995) ml1m/content/dataset/ml1m-images/185.jpg \n2 Batman Returns (1992) ml1m/content/dataset/ml1m-images/1377.jpg \n3 Boys from Brazil, The (1978) ml1m/content/dataset/ml1m-images/3204.jpg \n4 Dear Jesse (1997) ml1m/content/dataset/ml1m-images/1901.jpg \n\n label \\\n0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ... \n1 [0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n2 [1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, ... \n3 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n4 [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ... \n\n plot \n0 Washington Square is a 1997 American film abou... \n1 Net is a 1995 American film directed by James ... \n2 Batman returns to the Batman universe after a ... \n3 The movie Boys from Brazil, The (1978) is a ro... \n4 Dear Jesse is a 1997 American drama film about... ","text/html":"
\n | title | \nimg_path | \nlabel | \nplot | \n
---|---|---|---|---|
0 | \nWashington Square (1997) | \nml1m/content/dataset/ml1m-images/1650.jpg | \n[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ... | \nWashington Square is a 1997 American film abou... | \n
1 | \nNet, The (1995) | \nml1m/content/dataset/ml1m-images/185.jpg | \n[0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \nNet is a 1995 American film directed by James ... | \n
2 | \nBatman Returns (1992) | \nml1m/content/dataset/ml1m-images/1377.jpg | \n[1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, ... | \nBatman returns to the Batman universe after a ... | \n
3 | \nBoys from Brazil, The (1978) | \nml1m/content/dataset/ml1m-images/3204.jpg | \n[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \nThe movie Boys from Brazil, The (1978) is a ro... | \n
4 | \nDear Jesse (1997) | \nml1m/content/dataset/ml1m-images/1901.jpg | \n[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ... | \nDear Jesse is a 1997 American drama film about... | \n