File size: 873 Bytes
c53051e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd9ecb3
c53051e
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from transformers import AutoTokenizer

# Load data
df = pd.read_csv('NIFTY_OPTION_CHAIN_data.csv')

# Handle missing values
df.fillna(df.mean(), inplace=True)

# Normalize numerical columns
scaler = MinMaxScaler()
df[['open', 'high', 'low', 'close', 'volume', 'oi']] = scaler.fit_transform(df[['open', 'high', 'low', 'close', 'volume', 'oi']])

# Tokenize categorical columns
tokenizer = AutoTokenizer.from_pretrained('llama-3.1')
df['Index'] = tokenizer.encode(df['Index'], return_tensors='pt')
df['Expiry'] = tokenizer.encode(df['Expiry'], return_tensors='pt')
df['OptionType'] = tokenizer.encode(df['OptionType'], return_tensors='pt')
 
# Convert datetime columns
df['datetime'] = pd.to_datetime(df['datetime'])
df['date'] = df['datetime'].dt.date
df['time'] = df['datetime'].dt.time