Spaces:
No application file
No application file
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
from sklearn.preprocessing import MinMaxScaler
|
4 |
+
from transformers import AutoTokenizer
|
5 |
+
|
6 |
+
# Load data
|
7 |
+
df = pd.read_csv('NIFTY_OPTION_CHAIN_data.csv')
|
8 |
+
|
9 |
+
# Handle missing values
|
10 |
+
df.fillna(df.mean(), inplace=True)
|
11 |
+
|
12 |
+
# Normalize numerical columns
|
13 |
+
scaler = MinMaxScaler()
|
14 |
+
df[['open', 'high', 'low', 'close', 'volume', 'oi']] = scaler.fit_transform(df[['open', 'high', 'low', 'close', 'volume', 'oi']])
|
15 |
+
|
16 |
+
# Tokenize categorical columns
|
17 |
+
tokenizer = AutoTokenizer.from_pretrained('llama-3.1')
|
18 |
+
df['Index'] = tokenizer.encode(df['Index'], return_tensors='pt')
|
19 |
+
df['Expiry'] = tokenizer.encode(df['Expiry'], return_tensors='pt')
|
20 |
+
df['OptionType'] = tokenizer.encode(df['OptionType'], return_tensors='pt')
|
21 |
+
|
22 |
+
# Convert datetime columns
|
23 |
+
df['datetime'] = pd.to_datetime(df['datetime'])
|
24 |
+
df['date'] = df['datetime'].dt.date
|
25 |
+
df['time'] = df['datetime'].dt.time
|