import pandas as pd import numpy as np from sklearn.preprocessing import MinMaxScaler from transformers import AutoTokenizer # Load data df = pd.read_csv('NIFTY_OPTION_CHAIN_data.csv') # Handle missing values df.fillna(df.mean(), inplace=True) # Normalize numerical columns scaler = MinMaxScaler() df[['open', 'high', 'low', 'close', 'volume', 'oi']] = scaler.fit_transform(df[['open', 'high', 'low', 'close', 'volume', 'oi']]) # Tokenize categorical columns tokenizer = AutoTokenizer.from_pretrained('llama-3.1') df['Index'] = tokenizer.encode(df['Index'], return_tensors='pt') df['Expiry'] = tokenizer.encode(df['Expiry'], return_tensors='pt') df['OptionType'] = tokenizer.encode(df['OptionType'], return_tensors='pt') # Convert datetime columns df['datetime'] = pd.to_datetime(df['datetime']) df['date'] = df['datetime'].dt.date df['time'] = df['datetime'].dt.time