Spaces:
No application file
No application file
import pandas as pd | |
import numpy as np | |
from sklearn.preprocessing import MinMaxScaler | |
from transformers import AutoTokenizer | |
# Load data | |
df = pd.read_csv('NIFTY_OPTION_CHAIN_data.csv') | |
# Handle missing values | |
df.fillna(df.mean(), inplace=True) | |
# Normalize numerical columns | |
scaler = MinMaxScaler() | |
df[['open', 'high', 'low', 'close', 'volume', 'oi']] = scaler.fit_transform(df[['open', 'high', 'low', 'close', 'volume', 'oi']]) | |
# Tokenize categorical columns | |
tokenizer = AutoTokenizer.from_pretrained('llama-3.1') | |
df['Index'] = tokenizer.encode(df['Index'], return_tensors='pt') | |
df['Expiry'] = tokenizer.encode(df['Expiry'], return_tensors='pt') | |
df['OptionType'] = tokenizer.encode(df['OptionType'], return_tensors='pt') | |
# Convert datetime columns | |
df['datetime'] = pd.to_datetime(df['datetime']) | |
df['date'] = df['datetime'].dt.date | |
df['time'] = df['datetime'].dt.time |