Avinash109 commited on
Commit
3776d99
·
verified ·
1 Parent(s): cc470ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -35
app.py CHANGED
@@ -1,50 +1,101 @@
1
  import pandas as pd
2
- from transformers import LLaMAForSequenceClassification, LLaMATokenizer
 
 
 
 
 
3
 
4
  # Load the data
5
  data = pd.read_csv('BANKNIFTY_OPTION_CHAIN_data.csv')
6
 
7
- # Preprocess the data
8
- tokenizer = LLaMATokenizer.from_pretrained('llama-2-7b')
9
- model = LLaMAForSequenceClassification.from_pretrained('llama-2-7b', num_labels=2)
10
 
11
- # Fine-tune the model on the dataset
12
- train_texts, val_texts, train_labels, val_labels = train_test_split(data['text'], data['label'], test_size=0.2, random_state=42)
 
13
 
14
- train_encodings = tokenizer(train_texts, truncation=True, padding=True)
15
- val_encodings = tokenizer(val_texts, truncation=True, padding=True)
 
 
 
 
16
 
17
- train_dataset = Dataset(train_encodings, train_labels)
18
- val_dataset = Dataset(val_encodings, val_labels)
19
 
20
- training_args = TrainingArguments(
21
- output_dir='./results', # output directory
22
- num_train_epochs=3, # total # of training epochs
23
- per_device_train_batch_size=16, # batch size per device during training
24
- per_device_eval_batch_size=64, # batch size for evaluation
25
- warmup_steps=500, # number of warmup steps for learning rate scheduler
26
- weight_decay=0.01, # strength of weight decay
27
- logging_dir='./logs', # directory for storing logs
28
- )
29
 
30
- trainer = Trainer(
31
- model=model, # the instantiated model
32
- args=training_args, # training arguments
33
- train_dataset=train_dataset, # training dataset
34
- eval_dataset=val_dataset # evaluation dataset
35
- )
 
 
36
 
37
- trainer.train()
 
 
 
 
 
 
38
 
39
- # Use the fine-tuned model to generate strategies
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  def generate_strategies(data):
41
- inputs = tokenizer(data['text'], return_tensors='pt')
42
- outputs = model(**inputs)
43
- logits = outputs.logits
44
- strategies = torch.argmax(logits, dim=1)
45
- return strategies
46
 
47
  strategies = generate_strategies(data)
48
-
49
- # Print the strategies
50
- print(strategies)
 
1
  import pandas as pd
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.optim as optim
5
+ from torch.utils.data import Dataset, DataLoader
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.preprocessing import StandardScaler
8
 
9
  # Load the data
10
  data = pd.read_csv('BANKNIFTY_OPTION_CHAIN_data.csv')
11
 
12
+ # Select the numerical features for LSTM
13
+ numerical_features = ['open', 'high', 'low', 'close', 'volume', 'oi']
 
14
 
15
+ # Standardize the features
16
+ scaler = StandardScaler()
17
+ data[numerical_features] = scaler.fit_transform(data[numerical_features])
18
 
19
+ # Create a custom dataset class for our data
20
+ class BankNiftyDataset(Dataset):
21
+ def __init__(self, data, seq_len, numerical_features):
22
+ self.data = data
23
+ self.seq_len = seq_len
24
+ self.numerical_features = numerical_features
25
 
26
+ def __len__(self):
27
+ return len(self.data) - self.seq_len
28
 
29
+ def __getitem__(self, idx):
30
+ seq_data = self.data.iloc[idx:idx+self.seq_len][self.numerical_features].values
31
+ label = self.data['close'].iloc[idx+self.seq_len]
32
+ return {
33
+ 'features': torch.tensor(seq_data, dtype=torch.float32),
34
+ 'label': torch.tensor(label, dtype=torch.float32)
35
+ }
 
 
36
 
37
+ # Create data loaders for training and testing
38
+ seq_len = 10
39
+ batch_size = 32
40
+ train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)
41
+ train_dataset = BankNiftyDataset(train_data, seq_len, numerical_features)
42
+ val_dataset = BankNiftyDataset(val_data, seq_len, numerical_features)
43
+ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
44
+ val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
45
 
46
+ # Define the LSTM-RNN model
47
+ class LSTMModel(nn.Module):
48
+ def __init__(self, input_dim, hidden_dim, output_dim):
49
+ super(LSTMModel, self).__init__()
50
+ self.hidden_dim = hidden_dim
51
+ self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=1, batch_first=True)
52
+ self.fc = nn.Linear(hidden_dim, output_dim)
53
 
54
+ def forward(self, x):
55
+ h0 = torch.zeros(1, x.size(0), self.hidden_dim).to(x.device)
56
+ c0 = torch.zeros(1, x.size(0), self.hidden_dim).to(x.device)
57
+
58
+ out, _ = self.lstm(x, (h0, c0))
59
+ out = self.fc(out[:, -1, :])
60
+ return out
61
+
62
+ # Initialize the model, optimizer, and loss function
63
+ input_dim = len(numerical_features) # Number of numerical features
64
+ model = LSTMModel(input_dim=input_dim, hidden_dim=128, output_dim=1)
65
+ optimizer = optim.Adam(model.parameters(), lr=0.001)
66
+ criterion = nn.MSELoss()
67
+
68
+ # Train the model
69
+ for i in range(10):
70
+ model.train()
71
+ for batch in train_loader:
72
+ features = batch['features']
73
+ label = batch['label'].unsqueeze(1)
74
+
75
+ optimizer.zero_grad()
76
+ output = model(features)
77
+ loss = criterion(output, label)
78
+ loss.backward()
79
+ optimizer.step()
80
+
81
+ # Evaluate the model on the validation set
82
+ model.eval()
83
+ total_loss = 0
84
+ with torch.no_grad():
85
+ for batch in val_loader:
86
+ features = batch['features']
87
+ label = batch['label'].unsqueeze(1)
88
+ output = model(features)
89
+ loss = criterion(output, label)
90
+ total_loss += loss.item()
91
+ print(f'Iteration {i+1}, Val Loss: {total_loss / len(val_loader)}')
92
+
93
+ # Use the final trained model to generate strategies
94
  def generate_strategies(data):
95
+ seq_data = data.iloc[-seq_len:][numerical_features].values
96
+ features = torch.tensor(seq_data, dtype=torch.float32).unsqueeze(0) # Add batch dimension
97
+ output = model(features)
98
+ return output.item()
 
99
 
100
  strategies = generate_strategies(data)
101
+ print(f'Suggested strategy output: {strategies}')