Spaces:
No application file
No application file
File size: 873 Bytes
c53051e cd9ecb3 c53051e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from transformers import AutoTokenizer
# Load data
df = pd.read_csv('NIFTY_OPTION_CHAIN_data.csv')
# Handle missing values
df.fillna(df.mean(), inplace=True)
# Normalize numerical columns
scaler = MinMaxScaler()
df[['open', 'high', 'low', 'close', 'volume', 'oi']] = scaler.fit_transform(df[['open', 'high', 'low', 'close', 'volume', 'oi']])
# Tokenize categorical columns
tokenizer = AutoTokenizer.from_pretrained('llama-3.1')
df['Index'] = tokenizer.encode(df['Index'], return_tensors='pt')
df['Expiry'] = tokenizer.encode(df['Expiry'], return_tensors='pt')
df['OptionType'] = tokenizer.encode(df['OptionType'], return_tensors='pt')
# Convert datetime columns
df['datetime'] = pd.to_datetime(df['datetime'])
df['date'] = df['datetime'].dt.date
df['time'] = df['datetime'].dt.time |